aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/.gitignore1
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/Makefile_3210
-rw-r--r--arch/x86/kernel/Makefile_6427
-rw-r--r--arch/x86/kernel/acpi/Makefile10
-rw-r--r--arch/x86/kernel/acpi/Makefile_3210
-rw-r--r--arch/x86/kernel/acpi/Makefile_647
-rw-r--r--arch/x86/kernel/acpi/boot.c10
-rw-r--r--arch/x86/kernel/acpi/cstate.c4
-rw-r--r--arch/x86/kernel/acpi/earlyquirk_32.c84
-rw-r--r--arch/x86/kernel/acpi/processor.c3
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S12
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S133
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/aperture_64.c15
-rw-r--r--arch/x86/kernel/apic_32.c5
-rw-r--r--arch/x86/kernel/apic_64.c18
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c22
-rw-r--r--arch/x86/kernel/asm-offsets_64.c38
-rw-r--r--arch/x86/kernel/cpu/Makefile24
-rw-r--r--arch/x86/kernel/cpu/amd.c17
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig69
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/sc520_freq.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c2
-rw-r--r--arch/x86/kernel/cpu/cyrix.c10
-rw-r--r--arch/x86/kernel/cpu/intel.c17
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c89
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c (renamed from arch/x86/kernel/cpu/mcheck/mce.c)0
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c (renamed from arch/x86/kernel/mce_64.c)213
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c (renamed from arch/x86/kernel/mce_amd_64.c)11
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c (renamed from arch/x86/kernel/mce_intel_64.c)1
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c16
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c30
-rw-r--r--arch/x86/kernel/cpu/proc.c24
-rw-r--r--arch/x86/kernel/cpufreq/Kconfig108
-rw-r--r--arch/x86/kernel/cpuid.c60
-rw-r--r--arch/x86/kernel/crash.c (renamed from arch/x86/kernel/crash_32.c)11
-rw-r--r--arch/x86/kernel/crash_64.c135
-rw-r--r--arch/x86/kernel/crash_dump_32.c1
-rw-r--r--arch/x86/kernel/e820_32.c23
-rw-r--r--arch/x86/kernel/e820_64.c44
-rw-r--r--arch/x86/kernel/early-quirks.c (renamed from arch/x86/kernel/early-quirks_64.c)29
-rw-r--r--arch/x86/kernel/early_printk.c15
-rw-r--r--arch/x86/kernel/efi_32.c19
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/genapic_64.c17
-rw-r--r--arch/x86/kernel/genapic_flat_64.c3
-rw-r--r--arch/x86/kernel/head64.c22
-rw-r--r--arch/x86/kernel/head_32.S72
-rw-r--r--arch/x86/kernel/hpet.c5
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c1
-rw-r--r--arch/x86/kernel/i387_64.c9
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/i8259_32.c9
-rw-r--r--arch/x86/kernel/i8259_64.c6
-rw-r--r--arch/x86/kernel/init_task.c (renamed from arch/x86/kernel/init_task_32.c)11
-rw-r--r--arch/x86/kernel/init_task_64.c54
-rw-r--r--arch/x86/kernel/io_apic_32.c26
-rw-r--r--arch/x86/kernel/io_apic_64.c74
-rw-r--r--arch/x86/kernel/irq_32.c49
-rw-r--r--arch/x86/kernel/irq_64.c48
-rw-r--r--arch/x86/kernel/kprobes_32.c17
-rw-r--r--arch/x86/kernel/kprobes_64.c15
-rw-r--r--arch/x86/kernel/ldt_32.c14
-rw-r--r--arch/x86/kernel/ldt_64.c14
-rw-r--r--arch/x86/kernel/machine_kexec_32.c29
-rw-r--r--arch/x86/kernel/machine_kexec_64.c32
-rw-r--r--arch/x86/kernel/microcode.c6
-rw-r--r--arch/x86/kernel/mpparse_32.c2
-rw-r--r--arch/x86/kernel/mpparse_64.c17
-rw-r--r--arch/x86/kernel/msr.c39
-rw-r--r--arch/x86/kernel/nmi_32.c4
-rw-r--r--arch/x86/kernel/paravirt_32.c224
-rw-r--r--arch/x86/kernel/pci-calgary_64.c77
-rw-r--r--arch/x86/kernel/pci-dma_32.c1
-rw-r--r--arch/x86/kernel/pci-dma_64.c25
-rw-r--r--arch/x86/kernel/pci-gart_64.c94
-rw-r--r--arch/x86/kernel/pci-nommu_64.c11
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/process_32.c56
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace_32.c11
-rw-r--r--arch/x86/kernel/ptrace_64.c9
-rw-r--r--arch/x86/kernel/quirks.c165
-rw-r--r--arch/x86/kernel/reboot_64.c5
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c8
-rw-r--r--arch/x86/kernel/setup64.c14
-rw-r--r--arch/x86/kernel/setup_32.c129
-rw-r--r--arch/x86/kernel/setup_64.c143
-rw-r--r--arch/x86/kernel/signal_32.c8
-rw-r--r--arch/x86/kernel/signal_64.c2
-rw-r--r--arch/x86/kernel/smp_32.c12
-rw-r--r--arch/x86/kernel/smp_64.c122
-rw-r--r--arch/x86/kernel/smpboot_32.c163
-rw-r--r--arch/x86/kernel/smpboot_64.c131
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/suspend_64.c120
-rw-r--r--arch/x86/kernel/suspend_asm_64.S117
-rw-r--r--arch/x86/kernel/sys_i386_32.c2
-rw-r--r--arch/x86/kernel/tce_64.c4
-rw-r--r--arch/x86/kernel/topology.c6
-rw-r--r--arch/x86/kernel/trampoline_32.S4
-rw-r--r--arch/x86/kernel/trampoline_64.S7
-rw-r--r--arch/x86/kernel/traps_32.c97
-rw-r--r--arch/x86/kernel/traps_64.c24
-rw-r--r--arch/x86/kernel/tsc_32.c57
-rw-r--r--arch/x86/kernel/tsc_64.c4
-rw-r--r--arch/x86/kernel/vmi_32.c201
-rw-r--r--arch/x86/kernel/vsyscall_64.c32
133 files changed, 2380 insertions, 1873 deletions
diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore
index 40836ad9079..4ea38a39aed 100644
--- a/arch/x86/kernel/.gitignore
+++ b/arch/x86/kernel/.gitignore
@@ -1 +1,2 @@
vsyscall.lds
+vsyscall_32.lds
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 45855c97923..38573340b14 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -3,3 +3,7 @@ include ${srctree}/arch/x86/kernel/Makefile_32
else
include ${srctree}/arch/x86/kernel/Makefile_64
endif
+
+# Workaround to delete .lds files with make clean
+# The problem is that we do not enter Makefile_32 with make clean.
+clean-files := vsyscall*.lds vsyscall*.so
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index 7ff02063b85..a7bc93c2766 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -2,7 +2,8 @@
# Makefile for the linux kernel.
#
-extra-y := head_32.o init_task_32.o vmlinux.lds
+extra-y := head_32.o init_task.o vmlinux.lds
+CPPFLAGS_vmlinux.lds += -Ui386
obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
@@ -17,6 +18,7 @@ obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
+obj-$(CONFIG_PCI) += early-quirks.o
obj-$(CONFIG_APM) += apm_32.o
obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o
obj-$(CONFIG_SMP) += smpcommon_32.o
@@ -25,7 +27,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o
obj-$(CONFIG_X86_IO_APIC) += io_apic_32.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
-obj-$(CONFIG_KEXEC) += machine_kexec_32.o relocate_kernel_32.o crash_32.o
+obj-$(CONFIG_KEXEC) += machine_kexec_32.o relocate_kernel_32.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_32.o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
@@ -51,7 +53,7 @@ obj-$(CONFIG_SCx200) += scx200_32.o
# We must build both images before we can assemble it.
# Note: kbuild does not track this dependency due to usage of .incbin
$(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so
-targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
+targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so)
targets += vsyscall-note_32.o vsyscall_32.lds
# The DSO images are built using a special linker script.
@@ -59,7 +61,7 @@ quiet_cmd_syscall = SYSCALL $@
cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
-Wl,-T,$(filter-out FORCE,$^) -o $@
-export CPPFLAGS_vsyscall_32.lds += -P -C -U$(ARCH)
+export CPPFLAGS_vsyscall_32.lds += -P -C -Ui386
vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \
$(call ld-option, -Wl$(comma)--hash-style=sysv)
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 43da66213a4..5a88890d8ee 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -2,34 +2,32 @@
# Makefile for the linux kernel.
#
-extra-y := head_64.o head64.o init_task_64.o vmlinux.lds
+extra-y := head_64.o head64.o init_task.o vmlinux.lds
+CPPFLAGS_vmlinux.lds += -Ux86_64
EXTRA_AFLAGS := -traditional
+
obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \
x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \
setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \
pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \
- perfctr-watchdog.o i8253.o
+ i8253.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_X86_MCE) += mce_64.o therm_throt.o
-obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
-obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
-obj-$(CONFIG_MTRR) += cpu/mtrr/
-obj-$(CONFIG_ACPI) += acpi/
+obj-y += cpu/
+obj-y += acpi/
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_SMP) += smp_64.o smpboot_64.o trampoline_64.o tsc_sync.o
obj-y += apic_64.o nmi_64.o
obj-y += io_apic_64.o mpparse_64.o genapic_64.o genapic_flat_64.o
-obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash_64.o
+obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_64.o
obj-$(CONFIG_PM) += suspend_64.o
obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o
-obj-$(CONFIG_CPU_FREQ) += cpu/cpufreq/
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_IOMMU) += pci-gart_64.o aperture_64.o
+obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
obj-$(CONFIG_KPROBES) += kprobes_64.o
@@ -39,16 +37,9 @@ obj-$(CONFIG_K8_NB) += k8.o
obj-$(CONFIG_AUDIT) += audit_64.o
obj-$(CONFIG_MODULES) += module_64.o
-obj-$(CONFIG_PCI) += early-quirks_64.o
+obj-$(CONFIG_PCI) += early-quirks.o
obj-y += topology.o
-obj-y += intel_cacheinfo.o
-obj-y += addon_cpuid_features.o
obj-y += pcspeaker.o
CFLAGS_vsyscall_64.o := $(PROFILING) -g0
-
-therm_throt-y += cpu/mcheck/therm_throt.o
-intel_cacheinfo-y += cpu/intel_cacheinfo.o
-addon_cpuid_features-y += cpu/addon_cpuid_features.o
-perfctr-watchdog-y += cpu/perfctr-watchdog.o
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 3d567193954..1351c3982ee 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,5 +1,7 @@
-ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/kernel/acpi/Makefile_32
-else
-include ${srctree}/arch/x86/kernel/acpi/Makefile_64
+obj-$(CONFIG_ACPI) += boot.o
+obj-$(CONFIG_ACPI_SLEEP) += sleep_$(BITS).o wakeup_$(BITS).o
+
+ifneq ($(CONFIG_ACPI_PROCESSOR),)
+obj-y += cstate.o processor.o
endif
+
diff --git a/arch/x86/kernel/acpi/Makefile_32 b/arch/x86/kernel/acpi/Makefile_32
deleted file mode 100644
index a4852a2e919..00000000000
--- a/arch/x86/kernel/acpi/Makefile_32
+++ /dev/null
@@ -1,10 +0,0 @@
-obj-$(CONFIG_ACPI) += boot.o
-ifneq ($(CONFIG_PCI),)
-obj-$(CONFIG_X86_IO_APIC) += earlyquirk_32.o
-endif
-obj-$(CONFIG_ACPI_SLEEP) += sleep_32.o wakeup_32.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y += cstate.o processor.o
-endif
-
diff --git a/arch/x86/kernel/acpi/Makefile_64 b/arch/x86/kernel/acpi/Makefile_64
deleted file mode 100644
index 629425bc002..00000000000
--- a/arch/x86/kernel/acpi/Makefile_64
+++ /dev/null
@@ -1,7 +0,0 @@
-obj-y := boot.o
-obj-$(CONFIG_ACPI_SLEEP) += sleep_64.o wakeup_64.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y += processor.o cstate.o
-endif
-
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index afd2afe9102..289247d974c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -99,7 +99,7 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
/*
* The default interrupt routing model is PIC (8259). This gets
- * overriden if IOAPICs are enumerated (below).
+ * overridden if IOAPICs are enumerated (below).
*/
enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
@@ -414,8 +414,8 @@ acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end
*
* Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
* for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
- * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0)
- * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0)
+ * ECLR1 is IRQs 0-7 (IRQ 0, 1, 2 must be 0)
+ * ECLR2 is IRQs 8-15 (IRQ 8, 13 must be 0)
*/
void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
@@ -427,7 +427,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
old = inb(0x4d0) | (inb(0x4d1) << 8);
/*
- * If we use ACPI to set PCI irq's, then we should clear ELCR
+ * If we use ACPI to set PCI IRQs, then we should clear ELCR
* since we will set it correctly as we enable the PCI irq
* routing.
*/
@@ -555,7 +555,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
int acpi_unmap_lsapic(int cpu)
{
- x86_cpu_to_apicid[cpu] = -1;
+ per_cpu(x86_cpu_to_apicid, cpu) = -1;
cpu_clear(cpu, cpu_present_map);
num_processors--;
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 2d39f55d29a..10b67170b13 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -29,7 +29,7 @@
void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
unsigned int cpu)
{
- struct cpuinfo_x86 *c = cpu_data + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
flags->bm_check = 0;
if (num_online_cpus() == 1)
@@ -72,7 +72,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
struct acpi_processor_cx *cx, struct acpi_power_register *reg)
{
struct cstate_entry *percpu_entry;
- struct cpuinfo_x86 *c = cpu_data + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
cpumask_t saved_mask;
int retval;
diff --git a/arch/x86/kernel/acpi/earlyquirk_32.c b/arch/x86/kernel/acpi/earlyquirk_32.c
deleted file mode 100644
index 23f78efc577..00000000000
--- a/arch/x86/kernel/acpi/earlyquirk_32.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Do early PCI probing for bug detection when the main PCI subsystem is
- * not up yet.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/acpi.h>
-
-#include <asm/pci-direct.h>
-#include <asm/acpi.h>
-#include <asm/apic.h>
-
-#ifdef CONFIG_ACPI
-
-static int __init nvidia_hpet_check(struct acpi_table_header *header)
-{
- return 0;
-}
-#endif
-
-static int __init check_bridge(int vendor, int device)
-{
-#ifdef CONFIG_ACPI
- static int warned;
- /* According to Nvidia all timer overrides are bogus unless HPET
- is enabled. */
- if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) {
- if (!warned && acpi_table_parse(ACPI_SIG_HPET,
- nvidia_hpet_check)) {
- warned = 1;
- acpi_skip_timer_override = 1;
- printk(KERN_INFO "Nvidia board "
- "detected. Ignoring ACPI "
- "timer override.\n");
- printk(KERN_INFO "If you got timer trouble "
- "try acpi_use_timer_override\n");
-
- }
- }
-#endif
- if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) {
- timer_over_8254 = 0;
- printk(KERN_INFO "ATI board detected. Disabling timer routing "
- "over 8254.\n");
- }
- return 0;
-}
-
-void __init check_acpi_pci(void)
-{
- int num, slot, func;
-
- /* Assume the machine supports type 1. If not it will
- always read ffffffff and should not have any side effect.
- Actually a few buggy systems can machine check. Allow the user
- to disable it by command line option at least -AK */
- if (!early_pci_allowed())
- return;
-
- /* Poor man's PCI discovery */
- for (num = 0; num < 32; num++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- u32 class;
- u32 vendor;
- class = read_pci_config(num, slot, func,
- PCI_CLASS_REVISION);
- if (class == 0xffffffff)
- break;
-
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
- continue;
-
- vendor = read_pci_config(num, slot, func,
- PCI_VENDOR_ID);
-
- if (check_bridge(vendor & 0xffff, vendor >> 16))
- return;
- }
-
- }
- }
-}
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index b54fded4983..f63e5ff0aca 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -62,8 +62,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
/* Initialize _PDC data based on the CPU vendor */
void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
{
- unsigned int cpu = pr->id;
- struct cpuinfo_x86 *c = cpu_data + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(pr->id);
pr->pdc = NULL;
if (c->x86_vendor == X86_VENDOR_INTEL)
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index f22ba8534d2..a97313b1270 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -11,7 +11,7 @@
#
# If physical address of wakeup_code is 0x12345, BIOS should call us with
# cs = 0x1234, eip = 0x05
-#
+#
#define BEEP \
inb $97, %al; \
@@ -52,7 +52,6 @@ wakeup_code:
BEEP
1:
mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
- movw $0x0e00 + 'S', %fs:(0x12)
pushl $0 # Kill any dangerous flags
popfl
@@ -90,9 +89,6 @@ wakeup_code:
# make sure %cr4 is set correctly (features, etc)
movl real_save_cr4 - wakeup_code, %eax
movl %eax, %cr4
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'i', %fs:(0x12)
# need a gdt -- use lgdtl to force 32-bit operands, in case
# the GDT is located past 16 megabytes.
@@ -102,8 +98,6 @@ wakeup_code:
movl %eax, %cr0
jmp 1f
1:
- movw $0x0e00 + 'n', %fs:(0x14)
-
movl real_magic - wakeup_code, %eax
cmpl $0x12345678, %eax
jne bogus_real_magic
@@ -122,13 +116,11 @@ real_save_cr4: .long 0
real_magic: .long 0
video_mode: .long 0
realmode_flags: .long 0
-beep_flags: .long 0
real_efer_save_restore: .long 0
real_save_efer_edx: .long 0
real_save_efer_eax: .long 0
bogus_real_magic:
- movw $0x0e00 + 'B', %fs:(0x12)
jmp bogus_real_magic
/* This code uses an extended set of video mode numbers. These include:
@@ -194,7 +186,6 @@ wakeup_pmode_return:
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
- movw $0x0e00 + 'u', 0xb8016
# reload the gdt, as we need the full 32 bit address
lgdt saved_gdt
@@ -218,7 +209,6 @@ wakeup_pmode_return:
jmp *%eax
bogus_magic:
- movw $0x0e00 + 'B', 0xb8018
jmp bogus_magic
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8b4357e1efe..5ed3bc5c61d 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -4,6 +4,7 @@
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/msr.h>
+#include <asm/asm-offsets.h>
# Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
#
@@ -41,7 +42,6 @@ wakeup_code:
# Running in *copy* of this code, somewhere in low 1MB.
- movb $0xa1, %al ; outb %al, $0x80
cli
cld
# setup data segment
@@ -65,11 +65,6 @@ wakeup_code:
cmpl $0x12345678, %eax
jne bogus_real_magic
- call verify_cpu # Verify the cpu supports long
- # mode
- testl %eax, %eax
- jnz no_longmode
-
testl $1, realmode_flags - wakeup_code
jz 1f
lcall $0xc000,$3
@@ -84,12 +79,6 @@ wakeup_code:
call mode_set
1:
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'L', %fs:(0x10)
-
- movb $0xa2, %al ; outb %al, $0x80
-
mov %ds, %ax # Find 32bit wakeup_code addr
movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
shll $4, %esi
@@ -117,14 +106,10 @@ wakeup_32_vector:
.code32
wakeup_32:
# Running in this code, but at low address; paging is not yet turned on.
- movb $0xa5, %al ; outb %al, $0x80
movl $__KERNEL_DS, %eax
movl %eax, %ds
- movw $0x0e00 + 'i', %ds:(0xb8012)
- movb $0xa8, %al ; outb %al, $0x80;
-
/*
* Prepare for entering 64bits mode
*/
@@ -200,16 +185,11 @@ wakeup_long64:
*/
lgdt cpu_gdt_descr
- movw $0x0e00 + 'n', %ds:(0xb8014)
- movb $0xa9, %al ; outb %al, $0x80
-
movq saved_magic, %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
jne bogus_64_magic
- movw $0x0e00 + 'u', %ds:(0xb8016)
-
nop
nop
movw $__KERNEL_DS, %ax
@@ -220,13 +200,11 @@ wakeup_long64:
movw %ax, %gs
movq saved_rsp, %rsp
- movw $0x0e00 + 'x', %ds:(0xb8018)
movq saved_rbx, %rbx
movq saved_rdi, %rdi
movq saved_rsi, %rsi
movq saved_rbp, %rbp
- movw $0x0e00 + '!', %ds:(0xb801a)
movq saved_rip, %rax
jmp *%rax
@@ -256,21 +234,12 @@ realmode_flags: .quad 0
.code16
bogus_real_magic:
- movb $0xba,%al ; outb %al,$0x80
jmp bogus_real_magic
.code64
bogus_64_magic:
- movb $0xb3,%al ; outb %al,$0x80
jmp bogus_64_magic
-.code16
-no_longmode:
- movb $0xbc,%al ; outb %al,$0x80
- jmp no_longmode
-
-#include "../verify_cpu_64.S"
-
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
* NORMAL_VGA (-1)
@@ -374,31 +343,32 @@ do_suspend_lowlevel:
xorl %eax, %eax
call save_processor_state
- movq %rsp, saved_context_esp(%rip)
- movq %rax, saved_context_eax(%rip)
- movq %rbx, saved_context_ebx(%rip)
- movq %rcx, saved_context_ecx(%rip)
- movq %rdx, saved_context_edx(%rip)
- movq %rbp, saved_context_ebp(%rip)
- movq %rsi, saved_context_esi(%rip)
- movq %rdi, saved_context_edi(%rip)
- movq %r8, saved_context_r08(%rip)
- movq %r9, saved_context_r09(%rip)
- movq %r10, saved_context_r10(%rip)
- movq %r11, saved_context_r11(%rip)
- movq %r12, saved_context_r12(%rip)
- movq %r13, saved_context_r13(%rip)
- movq %r14, saved_context_r14(%rip)
- movq %r15, saved_context_r15(%rip)
- pushfq ; popq saved_context_eflags(%rip)
+ movq $saved_context, %rax
+ movq %rsp, pt_regs_rsp(%rax)
+ movq %rbp, pt_regs_rbp(%rax)
+ movq %rsi, pt_regs_rsi(%rax)
+ movq %rdi, pt_regs_rdi(%rax)
+ movq %rbx, pt_regs_rbx(%rax)
+ movq %rcx, pt_regs_rcx(%rax)
+ movq %rdx, pt_regs_rdx(%rax)
+ movq %r8, pt_regs_r8(%rax)
+ movq %r9, pt_regs_r9(%rax)
+ movq %r10, pt_regs_r10(%rax)
+ movq %r11, pt_regs_r11(%rax)
+ movq %r12, pt_regs_r12(%rax)
+ movq %r13, pt_regs_r13(%rax)
+ movq %r14, pt_regs_r14(%rax)
+ movq %r15, pt_regs_r15(%rax)
+ pushfq
+ popq pt_regs_eflags(%rax)
movq $.L97, saved_rip(%rip)
- movq %rsp,saved_rsp
- movq %rbp,saved_rbp
- movq %rbx,saved_rbx
- movq %rdi,saved_rdi
- movq %rsi,saved_rsi
+ movq %rsp, saved_rsp
+ movq %rbp, saved_rbp
+ movq %rbx, saved_rbx
+ movq %rdi, saved_rdi
+ movq %rsi, saved_rsi
addq $8, %rsp
movl $3, %edi
@@ -409,32 +379,35 @@ do_suspend_lowlevel:
.L99:
.align 4
movl $24, %eax
- movw %ax, %ds
- movq saved_context+58(%rip), %rax
- movq %rax, %cr4
- movq saved_context+50(%rip), %rax
- movq %rax, %cr3
- movq saved_context+42(%rip), %rax
- movq %rax, %cr2
- movq saved_context+34(%rip), %rax
- movq %rax, %cr0
- pushq saved_context_eflags(%rip) ; popfq
- movq saved_context_esp(%rip), %rsp
- movq saved_context_ebp(%rip), %rbp
- movq saved_context_eax(%rip), %rax
- movq saved_context_ebx(%rip), %rbx
- movq saved_context_ecx(%rip), %rcx
- movq saved_context_edx(%rip), %rdx
- movq saved_context_esi(%rip), %rsi
- movq saved_context_edi(%rip), %rdi
- movq saved_context_r08(%rip), %r8
- movq saved_context_r09(%rip), %r9
- movq saved_context_r10(%rip), %r10
- movq saved_context_r11(%rip), %r11
- movq saved_context_r12(%rip), %r12
- movq saved_context_r13(%rip), %r13
- movq saved_context_r14(%rip), %r14
- movq saved_context_r15(%rip), %r15
+ movw %ax, %ds
+
+ /* We don't restore %rax, it must be 0 anyway */
+ movq $saved_context, %rax
+ movq saved_context_cr4(%rax), %rbx
+ movq %rbx, %cr4
+ movq saved_context_cr3(%rax), %rbx
+ movq %rbx, %cr3
+ movq saved_context_cr2(%rax), %rbx
+ movq %rbx, %cr2
+ movq saved_context_cr0(%rax), %rbx
+ movq %rbx, %cr0
+ pushq pt_regs_eflags(%rax)
+ popfq
+ movq pt_regs_rsp(%rax), %rsp
+ movq pt_regs_rbp(%rax), %rbp
+ movq pt_regs_rsi(%rax), %rsi
+ movq pt_regs_rdi(%rax), %rdi
+ movq pt_regs_rbx(%rax), %rbx
+ movq pt_regs_rcx(%rax), %rcx
+ movq pt_regs_rdx(%rax), %rdx
+ movq pt_regs_r8(%rax), %r8
+ movq pt_regs_r9(%rax), %r9
+ movq pt_regs_r10(%rax), %r10
+ movq pt_regs_r11(%rax), %r11
+ movq pt_regs_r12(%rax), %r12
+ movq pt_regs_r13(%rax), %r13
+ movq pt_regs_r14(%rax), %r14
+ movq pt_regs_r15(%rax), %r15
xorl %eax, %eax
addq $8, %rsp
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 11b03d3c6fd..d6405e0842b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -63,11 +63,11 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
-asm("\t.data\nintelnops: "
+asm("\t.section .rodata, \"a\"\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8);
-extern unsigned char intelnops[];
-static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
+extern const unsigned char intelnops[];
+static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
NULL,
intelnops,
intelnops + 1,
@@ -81,11 +81,11 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
#endif
#ifdef K8_NOP1
-asm("\t.data\nk8nops: "
+asm("\t.section .rodata, \"a\"\nk8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
K8_NOP7 K8_NOP8);
-extern unsigned char k8nops[];
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
+extern const unsigned char k8nops[];
+static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
k8nops + 1,
@@ -99,11 +99,11 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
#endif
#ifdef K7_NOP1
-asm("\t.data\nk7nops: "
+asm("\t.section .rodata, \"a\"\nk7nops: "
K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
K7_NOP7 K7_NOP8);
-extern unsigned char k7nops[];
-static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
+extern const unsigned char k7nops[];
+static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
NULL,
k7nops,
k7nops + 1,
@@ -116,28 +116,49 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
};
#endif
+#ifdef P6_NOP1
+asm("\t.section .rodata, \"a\"\np6nops: "
+ P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
+ P6_NOP7 P6_NOP8);
+extern const unsigned char p6nops[];
+static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
+ NULL,
+ p6nops,
+ p6nops + 1,
+ p6nops + 1 + 2,
+ p6nops + 1 + 2 + 3,
+ p6nops + 1 + 2 + 3 + 4,
+ p6nops + 1 + 2 + 3 + 4 + 5,
+ p6nops + 1 + 2 + 3 + 4 + 5 + 6,
+ p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+#endif
+
#ifdef CONFIG_X86_64
extern char __vsyscall_0;
-static inline unsigned char** find_nop_table(void)
+static inline const unsigned char*const * find_nop_table(void)
{
- return k8_nops;
+ return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
}
#else /* CONFIG_X86_64 */
-static struct nop {
+static const struct nop {
int cpuid;
- unsigned char **noptable;
+ const unsigned char *const *noptable;
} noptypes[] = {
{ X86_FEATURE_K8, k8_nops },
{ X86_FEATURE_K7, k7_nops },
+ { X86_FEATURE_P4, p6_nops },
+ { X86_FEATURE_P3, p6_nops },
{ -1, NULL }
};
-static unsigned char** find_nop_table(void)
+static const unsigned char*const * find_nop_table(void)
{
- unsigned char **noptable = intel_nops;
+ const unsigned char *const *noptable = intel_nops;
int i;
for (i = 0; noptypes[i].cpuid >= 0; i++) {
@@ -154,7 +175,7 @@ static unsigned char** find_nop_table(void)
/* Use this to add nops to a buffer, then text_poke the whole buffer. */
static void add_nops(void *insns, unsigned int len)
{
- unsigned char **noptable = find_nop_table();
+ const unsigned char *const *noptable = find_nop_table();
while (len > 0) {
unsigned int noplen = len;
@@ -336,14 +357,14 @@ void alternatives_smp_switch(int smp)
if (smp) {
printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+ clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_lock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
} else {
printk(KERN_INFO "SMP alternatives: switching to UP code\n");
set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+ set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_unlock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
@@ -369,8 +390,8 @@ void apply_paravirt(struct paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insnbuf, p->instr, p->len);
- used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf,
- (unsigned long)p->instr, p->len);
+ used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
+ (unsigned long)p->instr, p->len);
BUG_ON(used > p->len);
@@ -411,13 +432,10 @@ void __init alternative_instructions(void)
if (1 == num_possible_cpus()) {
printk(KERN_INFO "SMP alternatives: switching to UP code\n");
set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
- set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+ set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
alternatives_smp_unlock(__smp_locks, __smp_locks_end,
_text, _etext);
}
- free_init_pages("SMP alternatives",
- (unsigned long)__smp_locks,
- (unsigned long)__smp_locks_end);
} else {
alternatives_smp_module_add(NULL, "core kernel",
__smp_locks, __smp_locks_end,
@@ -428,6 +446,11 @@ void __init alternative_instructions(void)
apply_paravirt(__parainstructions, __parainstructions_end);
local_irq_restore(flags);
+ if (smp_alt_once)
+ free_init_pages("SMP alternatives",
+ (unsigned long)__smp_locks,
+ (unsigned long)__smp_locks_end);
+
restart_nmi();
#ifdef CONFIG_X86_MCE
restart_mce();
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 8f681cae7bf..5b6992799c9 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,14 +20,14 @@
#include <linux/ioport.h>
#include <asm/e820.h>
#include <asm/io.h>
-#include <asm/iommu.h>
+#include <asm/gart.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
#include <asm/k8.h>
-int iommu_aperture;
-int iommu_aperture_disabled __initdata = 0;
-int iommu_aperture_allowed __initdata = 0;
+int gart_iommu_aperture;
+int gart_iommu_aperture_disabled __initdata = 0;
+int gart_iommu_aperture_allowed __initdata = 0;
int fallback_aper_order __initdata = 1; /* 64MB */
int fallback_aper_force __initdata = 0;
@@ -204,14 +204,15 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
return 0;
}
-void __init iommu_hole_init(void)
+void __init gart_iommu_hole_init(void)
{
int fix, num;
u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
u64 aper_base, last_aper_base = 0;
int valid_agp = 0;
- if (iommu_aperture_disabled || !fix_aperture || !early_pci_allowed())
+ if (gart_iommu_aperture_disabled || !fix_aperture ||
+ !early_pci_allowed())
return;
printk(KERN_INFO "Checking aperture...\n");
@@ -222,7 +223,7 @@ void __init iommu_hole_init(void)
continue;
iommu_detected = 1;
- iommu_aperture = 1;
+ gart_iommu_aperture = 1;
aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7;
aper_size = (32 * 1024 * 1024) << aper_order;
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 3d67ae18d76..08b07c17696 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -947,7 +947,7 @@ void __devinit setup_local_APIC(void)
* Set up LVT0, LVT1:
*
* set up through-local-APIC on the BP's LINT0. This is not
- * strictly necessery in pure symmetric-IO mode, but sometimes
+ * strictly necessary in pure symmetric-IO mode, but sometimes
* we delegate interrupts to the 8259A.
*/
/*
@@ -998,7 +998,7 @@ void __devinit setup_local_APIC(void)
} else {
if (esr_disable)
/*
- * Something untraceble is creating bad interrupts on
+ * Something untraceable is creating bad interrupts on
* secondary quads ... for the moment, just leave the
* ESR disabled - we can't do anything useful with the
* errors anyway - mbligh
@@ -1277,6 +1277,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
"should never happen.\n", smp_processor_id());
+ __get_cpu_var(irq_stat).irq_spurious_count++;
irq_exit();
}
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 09b82093bc7..f28ccb588fb 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -287,6 +287,20 @@ void disable_local_APIC(void)
apic_write(APIC_SPIV, value);
}
+void lapic_shutdown(void)
+{
+ unsigned long flags;
+
+ if (!cpu_has_apic)
+ return;
+
+ local_irq_save(flags);
+
+ disable_local_APIC();
+
+ local_irq_restore(flags);
+}
+
/*
* This is to verify that we're looking at a real local APIC.
* Check these against your board if the CPUs aren't getting
@@ -974,15 +988,12 @@ void __init setup_boot_APIC_clock (void)
*/
void __cpuinit check_boot_apic_timer_broadcast(void)
{
- struct clock_event_device *levt = &per_cpu(lapic_events, boot_cpu_id);
-
if (!disable_apic_timer ||
(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
return;
printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
- levt->features |= CLOCK_EVT_FEAT_DUMMY;
local_irq_enable();
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
@@ -1143,6 +1154,7 @@ asmlinkage void smp_spurious_interrupt(void)
if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
ack_APIC_irq();
+ add_pda(irq_spurious_count, 1);
irq_exit();
}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 32f2365c26e..17089a04102 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -57,7 +57,7 @@
* screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4
* 1.2a:Simple change to stop mysterious bug reports with SMP also added
* levels to the printk calls. APM is not defined for SMP machines.
- * The new replacment for it is, but Linux doesn't yet support this.
+ * The new replacement for it is, but Linux doesn't yet support this.
* Alan Cox Linux 2.1.55
* 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's
* 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 8029742c0fc..0e45981b2dd 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -15,6 +15,7 @@
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
+#include <asm/bootparam.h>
#include <asm/elf.h>
#include <xen/interface/xen.h>
@@ -116,12 +117,14 @@ void foo(void)
#ifdef CONFIG_PARAVIRT
BLANK();
- OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
- OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
- OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
- OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
- OFFSET(PARAVIRT_iret, paravirt_ops, iret);
- OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
+ OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+ OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+ OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+ OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+ OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+ OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+ OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif
#ifdef CONFIG_XEN
@@ -133,6 +136,7 @@ void foo(void)
#ifdef CONFIG_LGUEST_GUEST
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
+ OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc);
OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3);
@@ -144,4 +148,10 @@ void foo(void)
OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
#endif
+
+ BLANK();
+ OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_loadflags, boot_params, hdr.loadflags);
+ OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
+ OFFSET(BP_version, boot_params, hdr.version);
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 778953bc636..d1b6ed98774 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -15,12 +15,16 @@
#include <asm/segment.h>
#include <asm/thread_info.h>
#include <asm/ia32.h>
+#include <asm/bootparam.h>
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
#define BLANK() asm volatile("\n->" : : )
+#define OFFSET(sym, str, mem) \
+ DEFINE(sym, offsetof(struct str, mem))
+
#define __NO_STUBS 1
#undef __SYSCALL
#undef _ASM_X86_64_UNISTD_H_
@@ -76,10 +80,44 @@ int main(void)
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
DEFINE(pbe_next, offsetof(struct pbe, next));
BLANK();
+#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
+ ENTRY(rbx);
+ ENTRY(rbx);
+ ENTRY(rcx);
+ ENTRY(rdx);
+ ENTRY(rsp);
+ ENTRY(rbp);
+ ENTRY(rsi);
+ ENTRY(rdi);
+ ENTRY(r8);
+ ENTRY(r9);
+ ENTRY(r10);
+ ENTRY(r11);
+ ENTRY(r12);
+ ENTRY(r13);
+ ENTRY(r14);
+ ENTRY(r15);
+ ENTRY(eflags);
+ BLANK();
+#undef ENTRY
+#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
+ ENTRY(cr0);
+ ENTRY(cr2);
+ ENTRY(cr3);
+ ENTRY(cr4);
+ ENTRY(cr8);
+ BLANK();
+#undef ENTRY
DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
BLANK();
DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
BLANK();
DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+
+ BLANK();
+ OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_loadflags, boot_params, hdr.loadflags);
+ OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
+ OFFSET(BP_version, boot_params, hdr.version);
return 0;
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 778396c78d6..cfdb2f3bd76 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -2,19 +2,19 @@
# Makefile for x86-compatible CPU details and quirks
#
-obj-y := common.o proc.o bugs.o
+obj-y := intel_cacheinfo.o addon_cpuid_features.o
-obj-y += amd.o
-obj-y += cyrix.o
-obj-y += centaur.o
-obj-y += transmeta.o
-obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o
-obj-y += nexgen.o
-obj-y += umc.o
+obj-$(CONFIG_X86_32) += common.o proc.o bugs.o
+obj-$(CONFIG_X86_32) += amd.o
+obj-$(CONFIG_X86_32) += cyrix.o
+obj-$(CONFIG_X86_32) += centaur.o
+obj-$(CONFIG_X86_32) += transmeta.o
+obj-$(CONFIG_X86_32) += intel.o
+obj-$(CONFIG_X86_32) += nexgen.o
+obj-$(CONFIG_X86_32) += umc.o
-obj-$(CONFIG_X86_MCE) += mcheck/
-
-obj-$(CONFIG_MTRR) += mtrr/
-obj-$(CONFIG_CPU_FREQ) += cpufreq/
+obj-$(CONFIG_X86_MCE) += mcheck/
+obj-$(CONFIG_MTRR) += mtrr/
+obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index dcf6bbb1c7c..1ff88c7f45c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -4,6 +4,7 @@
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/apic.h>
+#include <asm/mach_apic.h>
#include "cpu.h"
@@ -45,13 +46,17 @@ static __cpuinit int amd_apic_timer_broken(void)
case CPUID_XFAM_10H:
case CPUID_XFAM_11H:
rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
- if (lo & ENABLE_C1E_MASK)
+ if (lo & ENABLE_C1E_MASK) {
+ if (smp_processor_id() != boot_cpu_physical_apicid)
+ printk(KERN_INFO "AMD C1E detected late. "
+ " Force timer broadcast.\n");
return 1;
- break;
- default:
- /* err on the side of caution */
+ }
+ break;
+ default:
+ /* err on the side of caution */
return 1;
- }
+ }
return 0;
}
#endif
@@ -261,7 +266,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_HT
/*
* On a AMD multi core setup the lower bits of the APIC id
- * distingush the cores.
+ * distinguish the cores.
*/
if (c->x86_max_cores > 1) {
int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 473eac883c7..9681fa15ddf 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -53,7 +53,7 @@ static u32 __cpuinit ramtop(void) /* 16388 */
continue;
/*
* Don't MCR over reserved space. Ignore the ISA hole
- * we frob around that catastrophy already
+ * we frob around that catastrophe already
*/
if (e820.map[i].type == E820_RESERVED)
@@ -287,7 +287,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
c->x86_capability[5] = cpuid_edx(0xC0000001);
}
- /* Cyrix III family needs CX8 & PGE explicity enabled. */
+ /* Cyrix III family needs CX8 & PGE explicitly enabled. */
if (c->x86_model >=6 && c->x86_model <= 9) {
rdmsr (MSR_VIA_FCR, lo, hi);
lo |= (1<<1 | 1<<7);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d506201d397..e2fcf2051bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -207,7 +207,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
static int __init x86_fxsr_setup(char * s)
{
- /* Tell all the other CPU's to not use it... */
+ /* Tell all the other CPUs to not use it... */
disable_x86_fxsr = 1;
/*
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index d8c6f132dc7..151eda0a23f 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -19,6 +19,9 @@ config X86_ACPI_CPUFREQ
Processor Performance States.
This driver also supports Intel Enhanced Speedstep.
+ To compile this driver as a module, choose M here: the
+ module will be called acpi-cpufreq.
+
For details, take a look at <file:Documentation/cpu-freq/>.
If in doubt, say N.
@@ -26,7 +29,7 @@ config X86_ACPI_CPUFREQ
config ELAN_CPUFREQ
tristate "AMD Elan SC400 and SC410"
select CPU_FREQ_TABLE
- depends on X86_ELAN
+ depends on X86_32 && X86_ELAN
---help---
This adds the CPUFreq driver for AMD Elan SC400 and SC410
processors.
@@ -42,7 +45,7 @@ config ELAN_CPUFREQ
config SC520_CPUFREQ
tristate "AMD Elan SC520"
select CPU_FREQ_TABLE
- depends on X86_ELAN
+ depends on X86_32 && X86_ELAN
---help---
This adds the CPUFreq driver for AMD Elan SC520 processor.
@@ -54,6 +57,7 @@ config SC520_CPUFREQ
config X86_POWERNOW_K6
tristate "AMD Mobile K6-2/K6-3 PowerNow!"
select CPU_FREQ_TABLE
+ depends on X86_32
help
This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
AMD K6-3+ processors.
@@ -65,6 +69,7 @@ config X86_POWERNOW_K6
config X86_POWERNOW_K7
tristate "AMD Mobile Athlon/Duron PowerNow!"
select CPU_FREQ_TABLE
+ depends on X86_32
help
This adds the CPUFreq driver for mobile AMD K7 mobile processors.
@@ -76,23 +81,27 @@ config X86_POWERNOW_K7_ACPI
bool
depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
+ depends on X86_32
default y
config X86_POWERNOW_K8
tristate "AMD Opteron/Athlon64 PowerNow!"
select CPU_FREQ_TABLE
- depends on EXPERIMENTAL
help
This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
+ To compile this driver as a module, choose M here: the
+ module will be called powernow-k8.
+
For details, take a look at <file:Documentation/cpu-freq/>.
If in doubt, say N.
config X86_POWERNOW_K8_ACPI
- bool "ACPI Support"
- select ACPI_PROCESSOR
- depends on ACPI && X86_POWERNOW_K8
+ bool
+ prompt "ACPI Support" if X86_32
+ depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR
+ depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
default y
help
This provides access to the K8s Processor Performance States via ACPI.
@@ -104,7 +113,7 @@ config X86_POWERNOW_K8_ACPI
config X86_GX_SUSPMOD
tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
- depends on PCI
+ depends on X86_32 && PCI
help
This add the CPUFreq driver for NatSemi Geode processors which
support suspend modulation.
@@ -114,15 +123,20 @@ config X86_GX_SUSPMOD
If in doubt, say N.
config X86_SPEEDSTEP_CENTRINO
- tristate "Intel Enhanced SpeedStep"
+ tristate "Intel Enhanced SpeedStep (deprecated)"
select CPU_FREQ_TABLE
- select X86_SPEEDSTEP_CENTRINO_TABLE
+ select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32
+ depends on X86_32 || (X86_64 && ACPI_PROCESSOR)
help
+ This is deprecated and this functionality is now merged into
+ acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
+ speedstep_centrino.
This adds the CPUFreq driver for Enhanced SpeedStep enabled
- mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However,
- you also need to say Y to "Use ACPI tables to decode..." below
- [which might imply enabling ACPI] if you want to use this driver
- on non-Banias CPUs.
+ mobile CPUs. This means Intel Pentium M (Centrino) CPUs
+ or 64bit enabled Intel Xeons.
+
+ To compile this driver as a module, choose M here: the
+ module will be called speedstep-centrino.
For details, take a look at <file:Documentation/cpu-freq/>.
@@ -130,7 +144,7 @@ config X86_SPEEDSTEP_CENTRINO
config X86_SPEEDSTEP_CENTRINO_TABLE
bool "Built-in tables for Banias CPUs"
- depends on X86_SPEEDSTEP_CENTRINO
+ depends on X86_32 && X86_SPEEDSTEP_CENTRINO
default y
help
Use built-in tables for Banias CPUs if ACPI encoding
@@ -141,6 +155,7 @@ config X86_SPEEDSTEP_CENTRINO_TABLE
config X86_SPEEDSTEP_ICH
tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
select CPU_FREQ_TABLE
+ depends on X86_32
help
This adds the CPUFreq driver for certain mobile Intel Pentium III
(Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
@@ -154,7 +169,7 @@ config X86_SPEEDSTEP_ICH
config X86_SPEEDSTEP_SMI
tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
select CPU_FREQ_TABLE
- depends on EXPERIMENTAL
+ depends on X86_32 && EXPERIMENTAL
help
This adds the CPUFreq driver for certain mobile Intel Pentium III
(Coppermine), all mobile Intel Pentium III-M (Tualatin)
@@ -169,15 +184,24 @@ config X86_P4_CLOCKMOD
select CPU_FREQ_TABLE
help
This adds the CPUFreq driver for Intel Pentium 4 / XEON
- processors.
+ processors. When enabled it will lower CPU temperature by skipping
+ clocks.
+
+ This driver should be only used in exceptional
+ circumstances when very low power is needed because it causes severe
+ slowdowns and noticeable latencies. Normally Speedstep should be used
+ instead.
+
+ To compile this driver as a module, choose M here: the
+ module will be called p4-clockmod.
For details, take a look at <file:Documentation/cpu-freq/>.
- If in doubt, say N.
+ Unless you are absolutely sure say N.
config X86_CPUFREQ_NFORCE2
tristate "nVidia nForce2 FSB changing"
- depends on EXPERIMENTAL
+ depends on X86_32 && EXPERIMENTAL
help
This adds the CPUFreq driver for FSB changing on nVidia nForce2
platforms.
@@ -188,6 +212,7 @@ config X86_CPUFREQ_NFORCE2
config X86_LONGRUN
tristate "Transmeta LongRun"
+ depends on X86_32
help
This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
which support LongRun.
@@ -199,7 +224,7 @@ config X86_LONGRUN
config X86_LONGHAUL
tristate "VIA Cyrix III Longhaul"
select CPU_FREQ_TABLE
- depends on ACPI_PROCESSOR
+ depends on X86_32 && ACPI_PROCESSOR
help
This adds the CPUFreq driver for VIA Samuel/CyrixIII,
VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
@@ -212,7 +237,7 @@ config X86_LONGHAUL
config X86_E_POWERSAVER
tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
select CPU_FREQ_TABLE
- depends on EXPERIMENTAL
+ depends on X86_32 && EXPERIMENTAL
help
This adds the CPUFreq driver for VIA C7 processors.
@@ -233,11 +258,11 @@ config X86_ACPI_CPUFREQ_PROC_INTF
config X86_SPEEDSTEP_LIB
tristate
- default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD
+ default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD)
config X86_SPEEDSTEP_RELAXED_CAP_CHECK
bool "Relaxed speedstep capability checks"
- depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
+ depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
help
Don't perform all checks for a speedstep capable system which would
normally be done. Some ancient or strange systems, though speedstep
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ffd01e5dcb5..fea0af0476b 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -77,7 +77,7 @@ static unsigned int acpi_pstate_strict;
static int check_est_cpu(unsigned int cpuid)
{
- struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
+ struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
if (cpu->x86_vendor != X86_VENDOR_INTEL ||
!cpu_has(cpu, X86_FEATURE_EST))
@@ -560,7 +560,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
unsigned int cpu = policy->cpu;
struct acpi_cpufreq_data *data;
unsigned int result = 0;
- struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+ struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
struct acpi_processor_performance *perf;
dprintk("acpi_cpufreq_cpu_init\n");
@@ -595,7 +595,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
dmi_check_system(sw_any_bug_dmi_table);
if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
- policy->cpus = cpu_core_map[cpu];
+ policy->cpus = per_cpu(cpu_core_map, cpu);
}
#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 32f0bda3fc9..f03e9153618 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -260,7 +260,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
freqs.old = nforce2_get(policy->cpu);
freqs.new = target_fsb * fid * 100;
- freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */
+ freqs.cpu = 0; /* Only one CPU on nForce2 platforms */
if (freqs.old == freqs.new)
return 0;
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c11baaf9f2b..326a4c81f68 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -305,7 +305,7 @@ static struct cpufreq_driver eps_driver = {
static int __init eps_init(void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
/* This driver will work only on Centaur C7 processors with
* Enhanced SpeedStep/PowerSaver registers */
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 1e7ae7dafcf..94619c22f56 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -199,7 +199,7 @@ static int elanfreq_target (struct cpufreq_policy *policy,
static int elanfreq_cpu_init(struct cpufreq_policy *policy)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
unsigned int i;
int result;
@@ -280,7 +280,7 @@ static struct cpufreq_driver elanfreq_driver = {
static int __init elanfreq_init(void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
/* Test if we have the right hardware */
if ((c->x86_vendor != X86_VENDOR_AMD) ||
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index ed2bda127c4..2ed7db2fd25 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -12,12 +12,12 @@
* of any nature resulting due to the use of this software. This
* software is provided AS-IS with no warranties.
*
- * Theoritical note:
+ * Theoretical note:
*
* (see Geode(tm) CS5530 manual (rev.4.1) page.56)
*
* CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
- * are based on Suspend Moduration.
+ * are based on Suspend Modulation.
*
* Suspend Modulation works by asserting and de-asserting the SUSP# pin
* to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
@@ -101,11 +101,11 @@
/* SUSCFG bits */
#define SUSMOD (1<<0) /* enable/disable suspend modulation */
-/* the belows support only with cs5530 (after rev.1.2)/cs5530A */
+/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */
#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */
/* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
-/* the belows support only with cs5530A */
+/* the below is supported only with cs5530A */
#define PWRSVE_ISA (1<<3) /* stop ISA clock */
#define PWRSVE (1<<4) /* active idle */
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 5045f5d583c..749d00cb2eb 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -780,7 +780,7 @@ static int longhaul_setup_southbridge(void)
static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
char *cpuname=NULL;
int ret;
u32 lo, hi;
@@ -959,7 +959,7 @@ static struct cpufreq_driver longhaul_driver = {
static int __init longhaul_init(void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index b2689514295..af4a867a097 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -172,7 +172,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
u32 save_lo, save_hi;
u32 eax, ebx, ecx, edx;
u32 try_hi;
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
if (!low_freq || !high_freq)
return -EINVAL;
@@ -298,7 +298,7 @@ static struct cpufreq_driver longrun_driver = {
*/
static int __init longrun_init(void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
!cpu_has(c, X86_FEATURE_LONGRUN))
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 8eb414b906d..14791ec55cf 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -195,12 +195,12 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
{
- struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+ struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
int cpuid = 0;
unsigned int i;
#ifdef CONFIG_SMP
- policy->cpus = cpu_sibling_map[policy->cpu];
+ policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
#endif
/* Errata workaround */
@@ -279,7 +279,7 @@ static struct cpufreq_driver p4clockmod_driver = {
static int __init cpufreq_p4_init(void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
int ret;
/*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index 6d028533931..eb9b62b0830 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -1,6 +1,6 @@
/*
* This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
- * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski.
+ * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski.
*
* Licensed under the terms of the GNU GPL License version 2.
*
@@ -215,7 +215,7 @@ static struct cpufreq_driver powernow_k6_driver = {
*/
static int __init powernow_k6_init(void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
((c->x86_model != 12) && (c->x86_model != 13)))
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 7decd6a50ff..b5a9863d6cd 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -114,7 +114,7 @@ static int check_fsb(unsigned int fsbspeed)
static int check_powernow(void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
unsigned int maxei, eax, ebx, ecx, edx;
if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
@@ -565,7 +565,7 @@ static unsigned int powernow_get(unsigned int cpu)
}
-static int __init acer_cpufreq_pst(struct dmi_system_id *d)
+static int __init acer_cpufreq_pst(const struct dmi_system_id *d)
{
printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident);
printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index f105ffd0996..99e1ef9939b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -57,7 +57,7 @@ static struct powernow_k8_data *powernow_data[NR_CPUS];
static int cpu_family = CPU_OPTERON;
#ifndef CONFIG_SMP
-static cpumask_t cpu_core_map[1];
+DEFINE_PER_CPU(cpumask_t, cpu_core_map);
#endif
/* Return a frequency in MHz, given an input fid */
@@ -144,7 +144,7 @@ static void count_off_irt(struct powernow_k8_data *data)
return;
}
-/* the voltage stabalization time */
+/* the voltage stabilization time */
static void count_off_vst(struct powernow_k8_data *data)
{
udelay(data->vstable * VST_UNITS_20US);
@@ -643,7 +643,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
data->powernow_table = powernow_table;
- if (first_cpu(cpu_core_map[data->cpu]) == data->cpu)
+ if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
print_basics(data);
for (j = 0; j < data->numps; j++)
@@ -797,7 +797,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
/* fill in data */
data->numps = data->acpi_data.state_count;
- if (first_cpu(cpu_core_map[data->cpu]) == data->cpu)
+ if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
print_basics(data);
powernow_k8_acpi_pst_values(data, 0);
@@ -1171,7 +1171,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
if (cpu_family == CPU_HW_PSTATE)
pol->cpus = cpumask_of_cpu(pol->cpu);
else
- pol->cpus = cpu_core_map[pol->cpu];
+ pol->cpus = per_cpu(cpu_core_map, pol->cpu);
data->available_cores = &(pol->cpus);
/* Take a crude guess here.
@@ -1237,7 +1237,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
cpumask_t oldmask = current->cpus_allowed;
unsigned int khz = 0;
- data = powernow_data[first_cpu(cpu_core_map[cpu])];
+ data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))];
if (!data)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 8ae88f18128..afd2b520d35 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -140,10 +140,10 @@ struct powernow_k8_data {
#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */
-#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */
+#define VST_UNITS_20US 20 /* Voltage Stabilization Time is in units of 20us */
/*
- * Most values of interest are enocoded in a single field of the _PSS
+ * Most values of interest are encoded in a single field of the _PSS
* entries: the "control" value.
*/
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index d9f3e90a7ae..42da9bd677d 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -102,7 +102,7 @@ static int sc520_freq_target (struct cpufreq_policy *policy,
static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
int result;
/* capability check */
@@ -151,7 +151,7 @@ static struct cpufreq_driver sc520_freq_driver = {
static int __init sc520_freq_init(void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
int err;
/* Test if we have the right hardware */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 811d4743854..3031f119619 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -230,7 +230,7 @@ static struct cpu_model models[] =
static int centrino_cpu_init_table(struct cpufreq_policy *policy)
{
- struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+ struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
struct cpu_model *model;
for(model = models; model->cpu_id != NULL; model++)
@@ -340,7 +340,7 @@ static unsigned int get_cur_freq(unsigned int cpu)
static int centrino_cpu_init(struct cpufreq_policy *policy)
{
- struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+ struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
unsigned freq;
unsigned l, h;
int ret;
@@ -612,7 +612,7 @@ static struct cpufreq_driver centrino_driver = {
*/
static int __init centrino_init(void)
{
- struct cpuinfo_x86 *cpu = cpu_data;
+ struct cpuinfo_x86 *cpu = &cpu_data(0);
if (!cpu_has(cpu, X86_FEATURE_EST))
return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 36685e8f7be..14d68aa301e 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -322,7 +322,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
/* only run on CPU to be set, or on its sibling */
#ifdef CONFIG_SMP
- policy->cpus = cpu_sibling_map[policy->cpu];
+ policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
#endif
cpus_allowed = current->cpus_allowed;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index b1acc8ce316..76c3ab0da46 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -228,7 +228,7 @@ EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency);
unsigned int speedstep_detect_processor (void)
{
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(0);
u32 ebx, msr_lo, msr_hi;
dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 122d2d75aa9..88d66fb8411 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -93,7 +93,7 @@ static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
ccr5 = getCx86(CX86_CCR5);
if (ccr5 & 2)
setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */
@@ -115,9 +115,9 @@ static void __cpuinit set_cx86_reorder(void)
printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- /* Load/Store Serialize to mem access disable (=reorder it)  */
+ /* Load/Store Serialize to mem access disable (=reorder it) */
setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
/* set load/store serialize from 1GB to 4GB */
ccr3 |= 0xe0;
@@ -146,7 +146,7 @@ static void __cpuinit set_cx86_inc(void)
printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* PCR1 -- Performance Control */
/* Incrementor on, whatever that is */
setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
@@ -256,7 +256,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
u32 vendor, device;
/* It isn't really a PCI quirk directly, but the cure is the
same. The MediaGX has deep magic SMM stuff that handles the
- SB emulation. It thows away the fifo on disable_dma() which
+ SB emulation. It throws away the fifo on disable_dma() which
is wrong and ruins the audio.
Bug2: VSA1 has a wrap bug so that using maximum sized DMA
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index dc4e08147b1..cc8c501b9f3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <asm/processor.h>
+#include <asm/pgtable.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
@@ -19,8 +20,6 @@
#include <mach_apic.h>
#endif
-extern int trap_init_f00f_bug(void);
-
#ifdef CONFIG_X86_INTEL_USERCOPY
/*
* Alignment at which movsl is preferred for bulk memory copies.
@@ -95,6 +94,20 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
return 1;
}
+#ifdef CONFIG_X86_F00F_BUG
+static void __cpuinit trap_init_f00f_bug(void)
+{
+ __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+
+ /*
+ * Update the IDT descriptor and reload the IDT so that
+ * it uses the read-only mapped virtual address.
+ */
+ idt_descr.address = fix_to_virt(FIX_F00F_IDT);
+ load_idt(&idt_descr);
+}
+#endif
+
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index db6c25aa577..9921b01fe19 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -170,15 +170,15 @@ union l3_cache {
unsigned val;
};
-static const unsigned short assocs[] = {
+static unsigned short assocs[] __cpuinitdata = {
[1] = 1, [2] = 2, [4] = 4, [6] = 8,
[8] = 16, [0xa] = 32, [0xb] = 48,
[0xc] = 64,
[0xf] = 0xffff // ??
};
-static const unsigned char levels[] = { 1, 1, 2, 3 };
-static const unsigned char types[] = { 1, 2, 3, 3 };
+static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
+static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
union _cpuid4_leaf_ebx *ebx,
@@ -295,7 +295,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
#ifdef CONFIG_X86_HT
- unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
+ unsigned int cpu = c->cpu_index;
#endif
if (c->cpuid_level > 3) {
@@ -417,14 +417,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
if (new_l2) {
l2 = new_l2;
#ifdef CONFIG_X86_HT
- cpu_llc_id[cpu] = l2_id;
+ per_cpu(cpu_llc_id, cpu) = l2_id;
#endif
}
if (new_l3) {
l3 = new_l3;
#ifdef CONFIG_X86_HT
- cpu_llc_id[cpu] = l3_id;
+ per_cpu(cpu_llc_id, cpu) = l3_id;
#endif
}
@@ -459,7 +459,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
struct _cpuid4_info *this_leaf, *sibling_leaf;
unsigned long num_threads_sharing;
int index_msb, i;
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
this_leaf = CPUID4_INFO_IDX(cpu, index);
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
@@ -470,8 +470,8 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
index_msb = get_count_order(num_threads_sharing);
for_each_online_cpu(i) {
- if (c[i].apicid >> index_msb ==
- c[cpu].apicid >> index_msb) {
+ if (cpu_data(i).apicid >> index_msb ==
+ c->apicid >> index_msb) {
cpu_set(i, this_leaf->shared_cpu_map);
if (i != cpu && cpuid4_info[i]) {
sibling_leaf = CPUID4_INFO_IDX(i, index);
@@ -493,12 +493,17 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
}
}
#else
-static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
-static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
+static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
#endif
static void free_cache_attributes(unsigned int cpu)
{
+ int i;
+
+ for (i = 0; i < num_cache_leaves; i++)
+ cache_remove_shared_cpu_map(cpu, i);
+
kfree(cpuid4_info[cpu]);
cpuid4_info[cpu] = NULL;
}
@@ -506,8 +511,8 @@ static void free_cache_attributes(unsigned int cpu)
static int __cpuinit detect_cache_attributes(unsigned int cpu)
{
struct _cpuid4_info *this_leaf;
- unsigned long j;
- int retval;
+ unsigned long j;
+ int retval;
cpumask_t oldmask;
if (num_cache_leaves == 0)
@@ -524,19 +529,26 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
goto out;
/* Do cpuid and store the results */
- retval = 0;
for (j = 0; j < num_cache_leaves; j++) {
this_leaf = CPUID4_INFO_IDX(cpu, j);
retval = cpuid4_cache_lookup(j, this_leaf);
- if (unlikely(retval < 0))
+ if (unlikely(retval < 0)) {
+ int i;
+
+ for (i = 0; i < j; i++)
+ cache_remove_shared_cpu_map(cpu, i);
break;
+ }
cache_shared_cpu_map_setup(cpu, j);
}
set_cpus_allowed(current, oldmask);
out:
- if (retval)
- free_cache_attributes(cpu);
+ if (retval) {
+ kfree(cpuid4_info[cpu]);
+ cpuid4_info[cpu] = NULL;
+ }
+
return retval;
}
@@ -669,7 +681,7 @@ static struct kobj_type ktype_percpu_entry = {
.sysfs_ops = &sysfs_ops,
};
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
+static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
{
kfree(cache_kobject[cpu]);
kfree(index_kobject[cpu]);
@@ -680,13 +692,14 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu)
static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
{
+ int err;
if (num_cache_leaves == 0)
return -ENOENT;
- detect_cache_attributes(cpu);
- if (cpuid4_info[cpu] == NULL)
- return -ENOENT;
+ err = detect_cache_attributes(cpu);
+ if (err)
+ return err;
/* Allocate all required memory */
cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -705,13 +718,15 @@ err_out:
return -ENOMEM;
}
+static cpumask_t cache_dev_map = CPU_MASK_NONE;
+
/* Add/Remove cache interface for CPU device */
static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i, j;
struct _index_kobject *this_object;
- int retval = 0;
+ int retval;
retval = cpuid4_cache_sysfs_init(cpu);
if (unlikely(retval < 0))
@@ -721,6 +736,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
kobject_set_name(cache_kobject[cpu], "%s", "cache");
cache_kobject[cpu]->ktype = &ktype_percpu_entry;
retval = kobject_register(cache_kobject[cpu]);
+ if (retval < 0) {
+ cpuid4_cache_sysfs_exit(cpu);
+ return retval;
+ }
for (i = 0; i < num_cache_leaves; i++) {
this_object = INDEX_KOBJECT_PTR(cpu,i);
@@ -740,6 +759,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
break;
}
}
+ if (!retval)
+ cpu_set(cpu, cache_dev_map);
+
return retval;
}
@@ -750,13 +772,14 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
if (cpuid4_info[cpu] == NULL)
return;
- for (i = 0; i < num_cache_leaves; i++) {
- cache_remove_shared_cpu_map(cpu, i);
+ if (!cpu_isset(cpu, cache_dev_map))
+ return;
+ cpu_clear(cpu, cache_dev_map);
+
+ for (i = 0; i < num_cache_leaves; i++)
kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
- }
kobject_unregister(cache_kobject[cpu]);
cpuid4_cache_sysfs_exit(cpu);
- return;
}
static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
@@ -781,7 +804,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
{
- .notifier_call = cacheinfo_cpu_callback,
+ .notifier_call = cacheinfo_cpu_callback,
};
static int __cpuinit cache_sysfs_init(void)
@@ -791,13 +814,15 @@ static int __cpuinit cache_sysfs_init(void)
if (num_cache_leaves == 0)
return 0;
- register_hotcpu_notifier(&cacheinfo_cpu_notifier);
-
for_each_online_cpu(i) {
- cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
- (void *)(long)i);
- }
+ int err;
+ struct sys_device *sys_dev = get_cpu_sysdev(i);
+ err = cache_add_dev(sys_dev);
+ if (err)
+ return err;
+ }
+ register_hotcpu_notifier(&cacheinfo_cpu_notifier);
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index f1ebe1c1c17..d7d2323bbb6 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,2 +1,6 @@
-obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
-obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
+obj-y = mce_$(BITS).o therm_throt.o
+
+obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
+obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
+obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
+obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 34c781eddee..34c781eddee 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index a66d607f5b9..447b351f1f2 100644
--- a/arch/x86/kernel/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -1,8 +1,8 @@
/*
* Machine check handler.
* K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
- * Rest from unknown author(s).
- * 2004 Andi Kleen. Rewrote most of it.
+ * Rest from unknown author(s).
+ * 2004 Andi Kleen. Rewrote most of it.
*/
#include <linux/init.h>
@@ -23,7 +23,7 @@
#include <linux/ctype.h>
#include <linux/kmod.h>
#include <linux/kdebug.h>
-#include <asm/processor.h>
+#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/uaccess.h>
@@ -63,10 +63,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
* separate MCEs from kernel messages to avoid bogus bug reports.
*/
-struct mce_log mcelog = {
+struct mce_log mcelog = {
MCE_LOG_SIGNATURE,
MCE_LOG_LEN,
-};
+};
void mce_log(struct mce *mce)
{
@@ -76,9 +76,6 @@ void mce_log(struct mce *mce)
wmb();
for (;;) {
entry = rcu_dereference(mcelog.next);
- /* The rmb forces the compiler to reload next in each
- iteration */
- rmb();
for (;;) {
/* When the buffer fills up discard new entries. Assume
that the earlier errors are the more interesting. */
@@ -114,42 +111,42 @@ static void print_mce(struct mce *m)
"CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
m->cpu, m->mcgstatus, m->bank, m->status);
if (m->rip) {
- printk(KERN_EMERG
- "RIP%s %02x:<%016Lx> ",
+ printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
m->cs, m->rip);
if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->rip);
printk("\n");
}
- printk(KERN_EMERG "TSC %Lx ", m->tsc);
+ printk(KERN_EMERG "TSC %Lx ", m->tsc);
if (m->addr)
printk("ADDR %Lx ", m->addr);
if (m->misc)
- printk("MISC %Lx ", m->misc);
+ printk("MISC %Lx ", m->misc);
printk("\n");
printk(KERN_EMERG "This is not a software problem!\n");
- printk(KERN_EMERG
- "Run through mcelog --ascii to decode and contact your hardware vendor\n");
+ printk(KERN_EMERG "Run through mcelog --ascii to decode "
+ "and contact your hardware vendor\n");
}
static void mce_panic(char *msg, struct mce *backup, unsigned long start)
-{
+{
int i;
oops_begin();
for (i = 0; i < MCE_LOG_LEN; i++) {
unsigned long tsc = mcelog.entry[i].tsc;
+
if (time_before(tsc, start))
continue;
- print_mce(&mcelog.entry[i]);
+ print_mce(&mcelog.entry[i]);
if (backup && mcelog.entry[i].tsc == backup->tsc)
backup = NULL;
}
if (backup)
print_mce(backup);
panic(msg);
-}
+}
static int mce_available(struct cpuinfo_x86 *c)
{
@@ -173,10 +170,9 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
}
}
-/*
+/*
* The actual machine check handler
*/
-
void do_machine_check(struct pt_regs * regs, long error_code)
{
struct mce m, panicm;
@@ -197,7 +193,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
atomic_inc(&mce_entry);
if (regs)
- notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
+ notify_die(DIE_NMI, "machine check", regs, error_code, 18,
+ SIGKILL);
if (!banks)
goto out2;
@@ -207,15 +204,15 @@ void do_machine_check(struct pt_regs * regs, long error_code)
/* if the restart IP is not valid, we're done for */
if (!(m.mcgstatus & MCG_STATUS_RIPV))
no_way_out = 1;
-
+
rdtscll(mcestart);
barrier();
for (i = 0; i < banks; i++) {
if (!bank[i])
continue;
-
- m.misc = 0;
+
+ m.misc = 0;
m.addr = 0;
m.bank = i;
m.tsc = 0;
@@ -323,7 +320,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
#ifdef CONFIG_X86_MCE_INTEL
/***
* mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
- * @cpu: The CPU on which the event occured.
+ * @cpu: The CPU on which the event occurred.
* @status: Event status information
*
* This function should be called by the thermal interrupt after the
@@ -375,7 +372,7 @@ static void mcheck_timer(struct work_struct *work)
if (mce_notify_user()) {
next_interval = max(next_interval/2, HZ/100);
} else {
- next_interval = min(next_interval*2,
+ next_interval = min(next_interval * 2,
(int)round_jiffies_relative(check_interval*HZ));
}
@@ -426,18 +423,18 @@ static struct notifier_block mce_idle_notifier = {
};
static __init int periodic_mcheck_init(void)
-{
+{
next_interval = check_interval * HZ;
if (next_interval)
schedule_delayed_work(&mcheck_work,
round_jiffies_relative(next_interval));
idle_notifier_register(&mce_idle_notifier);
return 0;
-}
+}
__initcall(periodic_mcheck_init);
-/*
+/*
* Initialize Machine Checks for a CPU.
*/
static void mce_init(void *dummy)
@@ -447,9 +444,9 @@ static void mce_init(void *dummy)
rdmsrl(MSR_IA32_MCG_CAP, cap);
banks = cap & 0xff;
- if (banks > NR_BANKS) {
+ if (banks > NR_BANKS) {
printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
- banks = NR_BANKS;
+ banks = NR_BANKS;
}
/* Use accurate RIP reporting if available. */
if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@ -467,15 +464,15 @@ static void mce_init(void *dummy)
for (i = 0; i < banks; i++) {
wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
- }
+ }
}
/* Add per CPU specific workarounds here */
static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
-{
+{
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
- /* disable GART TBL walk error reporting, which trips off
+ /* disable GART TBL walk error reporting, which trips off
incorrectly with the IOMMU & 3ware & Cerberus. */
clear_bit(10, &bank[4]);
/* Lots of broken BIOS around that don't clear them
@@ -483,7 +480,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
mce_bootlog = 0;
}
-}
+}
static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
{
@@ -499,15 +496,15 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
}
}
-/*
+/*
* Called for each booted CPU to set up machine checks.
- * Must be called with preempt off.
+ * Must be called with preempt off.
*/
void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
{
static cpumask_t mce_cpus = CPU_MASK_NONE;
- mce_cpu_quirks(c);
+ mce_cpu_quirks(c);
if (mce_dont_init ||
cpu_test_and_set(smp_processor_id(), mce_cpus) ||
@@ -556,13 +553,15 @@ static int mce_release(struct inode *inode, struct file *file)
return 0;
}
-static void collect_tscs(void *data)
-{
+static void collect_tscs(void *data)
+{
unsigned long *cpu_tsc = (unsigned long *)data;
+
rdtscll(cpu_tsc[smp_processor_id()]);
-}
+}
-static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off)
+static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
+ loff_t *off)
{
unsigned long *cpu_tsc;
static DECLARE_MUTEX(mce_read_sem);
@@ -574,19 +573,20 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
if (!cpu_tsc)
return -ENOMEM;
- down(&mce_read_sem);
+ down(&mce_read_sem);
next = rcu_dereference(mcelog.next);
/* Only supports full reads right now */
- if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
+ if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
up(&mce_read_sem);
kfree(cpu_tsc);
return -EINVAL;
}
err = 0;
- for (i = 0; i < next; i++) {
+ for (i = 0; i < next; i++) {
unsigned long start = jiffies;
+
while (!mcelog.entry[i].finished) {
if (time_after_eq(jiffies, start + 2)) {
memset(mcelog.entry + i,0, sizeof(struct mce));
@@ -596,31 +596,34 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
}
smp_rmb();
err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
- buf += sizeof(struct mce);
+ buf += sizeof(struct mce);
timeout:
;
- }
+ }
memset(mcelog.entry, 0, next * sizeof(struct mce));
mcelog.next = 0;
synchronize_sched();
- /* Collect entries that were still getting written before the synchronize. */
-
+ /*
+ * Collect entries that were still getting written before the
+ * synchronize.
+ */
on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
- for (i = next; i < MCE_LOG_LEN; i++) {
- if (mcelog.entry[i].finished &&
- mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
- err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce));
+ for (i = next; i < MCE_LOG_LEN; i++) {
+ if (mcelog.entry[i].finished &&
+ mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
+ err |= copy_to_user(buf, mcelog.entry+i,
+ sizeof(struct mce));
smp_rmb();
buf += sizeof(struct mce);
memset(&mcelog.entry[i], 0, sizeof(struct mce));
}
- }
+ }
up(&mce_read_sem);
kfree(cpu_tsc);
- return err ? -EFAULT : buf - ubuf;
+ return err ? -EFAULT : buf - ubuf;
}
static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -631,26 +634,29 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
return 0;
}
-static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
+static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd,
+ unsigned long arg)
{
int __user *p = (int __user *)arg;
+
if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ return -EPERM;
switch (cmd) {
- case MCE_GET_RECORD_LEN:
+ case MCE_GET_RECORD_LEN:
return put_user(sizeof(struct mce), p);
case MCE_GET_LOG_LEN:
- return put_user(MCE_LOG_LEN, p);
+ return put_user(MCE_LOG_LEN, p);
case MCE_GETCLEAR_FLAGS: {
unsigned flags;
- do {
+
+ do {
flags = mcelog.flags;
- } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
- return put_user(flags, p);
+ } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
+ return put_user(flags, p);
}
default:
- return -ENOTTY;
- }
+ return -ENOTTY;
+ }
}
static const struct file_operations mce_chrdev_ops = {
@@ -681,25 +687,22 @@ void __init restart_mce(void)
set_in_cr4(X86_CR4_MCE);
}
-/*
- * Old style boot options parsing. Only for compatibility.
+/*
+ * Old style boot options parsing. Only for compatibility.
*/
-
static int __init mcheck_disable(char *str)
{
mce_dont_init = 1;
return 1;
}
-/* mce=off disables machine check. Note you can reenable it later
+/* mce=off disables machine check. Note you can re-enable it later
using sysfs.
mce=TOLERANCELEVEL (number, see above)
mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
mce=nobootlog Don't log MCEs from before booting. */
static int __init mcheck_enable(char *str)
{
- if (*str == '=')
- str++;
if (!strcmp(str, "off"))
mce_dont_init = 1;
else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
@@ -707,16 +710,16 @@ static int __init mcheck_enable(char *str)
else if (isdigit(str[0]))
get_option(&str, &tolerant);
else
- printk("mce= argument %s ignored. Please use /sys", str);
+ printk("mce= argument %s ignored. Please use /sys", str);
return 1;
}
__setup("nomce", mcheck_disable);
-__setup("mce", mcheck_enable);
+__setup("mce=", mcheck_enable);
-/*
+/*
* Sysfs support
- */
+ */
/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
Only one CPU is active at this time, the others get readded later using
@@ -728,12 +731,12 @@ static int mce_resume(struct sys_device *dev)
}
/* Reinit MCEs after user configuration changes */
-static void mce_restart(void)
-{
+static void mce_restart(void)
+{
if (next_interval)
cancel_delayed_work(&mcheck_work);
/* Timer race is harmless here */
- on_each_cpu(mce_init, NULL, 1, 1);
+ on_each_cpu(mce_init, NULL, 1, 1);
next_interval = check_interval * HZ;
if (next_interval)
schedule_delayed_work(&mcheck_work,
@@ -749,17 +752,17 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
- static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
- return sprintf(buf, "%lx\n", (unsigned long)var); \
- } \
+ static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
+ return sprintf(buf, "%lx\n", (unsigned long)var); \
+ } \
static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
- char *end; \
- unsigned long new = simple_strtoul(buf, &end, 0); \
- if (end == buf) return -EINVAL; \
- var = new; \
- start; \
- return end-buf; \
- } \
+ char *end; \
+ unsigned long new = simple_strtoul(buf, &end, 0); \
+ if (end == buf) return -EINVAL; \
+ var = new; \
+ start; \
+ return end-buf; \
+ } \
static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
/* TBD should generate these dynamically based on number of available banks */
@@ -799,24 +802,41 @@ static struct sysdev_attribute *mce_attributes[] = {
NULL
};
+static cpumask_t mce_device_initialized = CPU_MASK_NONE;
+
/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
static __cpuinit int mce_create_device(unsigned int cpu)
{
int err;
int i;
- if (!mce_available(&cpu_data[cpu]))
+
+ if (!mce_available(&cpu_data(cpu)))
return -EIO;
+ memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
per_cpu(device_mce,cpu).id = cpu;
per_cpu(device_mce,cpu).cls = &mce_sysclass;
err = sysdev_register(&per_cpu(device_mce,cpu));
+ if (err)
+ return err;
+
+ for (i = 0; mce_attributes[i]; i++) {
+ err = sysdev_create_file(&per_cpu(device_mce,cpu),
+ mce_attributes[i]);
+ if (err)
+ goto error;
+ }
+ cpu_set(cpu, mce_device_initialized);
- if (!err) {
- for (i = 0; mce_attributes[i]; i++)
- sysdev_create_file(&per_cpu(device_mce,cpu),
- mce_attributes[i]);
+ return 0;
+error:
+ while (i--) {
+ sysdev_remove_file(&per_cpu(device_mce,cpu),
+ mce_attributes[i]);
}
+ sysdev_unregister(&per_cpu(device_mce,cpu));
+
return err;
}
@@ -824,11 +844,14 @@ static void mce_remove_device(unsigned int cpu)
{
int i;
+ if (!cpu_isset(cpu, mce_device_initialized))
+ return;
+
for (i = 0; mce_attributes[i]; i++)
sysdev_remove_file(&per_cpu(device_mce,cpu),
mce_attributes[i]);
sysdev_unregister(&per_cpu(device_mce,cpu));
- memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
+ cpu_clear(cpu, mce_device_initialized);
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
@@ -862,9 +885,13 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;
err = sysdev_class_register(&mce_sysclass);
+ if (err)
+ return err;
for_each_online_cpu(i) {
- mce_create_device(i);
+ err = mce_create_device(i);
+ if (err)
+ return err;
}
register_hotcpu_notifier(&mce_cpu_notifier);
diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 2f8a7f18b0f..752fb16a817 100644
--- a/arch/x86/kernel/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -237,6 +237,7 @@ asmlinkage void mce_threshold_interrupt(void)
}
}
out:
+ add_pda(irq_threshold_count, 1);
irq_exit();
}
@@ -471,11 +472,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
- if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */
- i = first_cpu(cpu_core_map[cpu]);
+ if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
+ i = first_cpu(per_cpu(cpu_core_map, cpu));
/* first core not up yet */
- if (cpu_data[i].cpu_core_id)
+ if (cpu_data(i).cpu_core_id)
goto out;
/* already linked */
@@ -492,7 +493,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- b->cpus = cpu_core_map[cpu];
+ b->cpus = per_cpu(cpu_core_map, cpu);
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
}
@@ -509,7 +510,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
#ifndef CONFIG_SMP
b->cpus = CPU_MASK_ALL;
#else
- b->cpus = cpu_core_map[cpu];
+ b->cpus = per_cpu(cpu_core_map, cpu);
#endif
err = kobject_register(&b->kobj);
if (err)
diff --git a/arch/x86/kernel/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 6551505d8a2..c17eaf5dd6d 100644
--- a/arch/x86/kernel/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -26,6 +26,7 @@ asmlinkage void smp_thermal_interrupt(void)
if (therm_throt_process(msr_val & 1))
mce_log_therm_throt_event(smp_processor_id(), msr_val);
+ add_pda(irq_thermal_count, 1);
irq_exit();
}
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 1509edfb231..be4dabfee1f 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -61,6 +61,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
{
irq_enter();
vendor_thermal_interrupt(regs);
+ __get_cpu_var(irq_stat).irq_thermal_count++;
irq_exit();
}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1203dc5ab87..24885be5c48 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -131,17 +131,19 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
- int err;
+ int err = 0;
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(sys_dev);
mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
mutex_lock(&therm_cpu_lock);
@@ -149,10 +151,10 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
mutex_unlock(&therm_cpu_lock);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
-static struct notifier_block thermal_throttle_cpu_notifier =
+static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
{
.notifier_call = thermal_throttle_cpu_callback,
};
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 2287d4863a8..9964be3de2b 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -147,10 +147,10 @@ static void prepare_set(void)
write_cr0(cr0);
wbinvd();
- /* Cyrix ARRs - everything else were excluded at the top */
+ /* Cyrix ARRs - everything else was excluded at the top */
ccr3 = getCx86(CX86_CCR3);
- /* Cyrix ARRs - everything else were excluded at the top */
+ /* Cyrix ARRs - everything else was excluded at the top */
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 56f64e34829..992f08dfbb6 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -182,7 +182,7 @@ static inline void k8_enable_fixed_iorrs(void)
/**
* Checks and updates an fixed-range MTRR if it differs from the value it
- * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also.
+ * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also.
* see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
* \param msr MSR address of the MTTR which should be checked and updated
* \param changed pointer which indicates whether the MTRR needed to be changed
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c48b6fea5ab..3b20613325d 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -139,13 +139,12 @@ struct set_mtrr_data {
mtrr_type smp_type;
};
-#ifdef CONFIG_SMP
-
static void ipi_handler(void *info)
/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
[RETURNS] Nothing.
*/
{
+#ifdef CONFIG_SMP
struct set_mtrr_data *data = info;
unsigned long flags;
@@ -168,9 +167,8 @@ static void ipi_handler(void *info)
atomic_dec(&data->count);
local_irq_restore(flags);
-}
-
#endif
+}
static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
return type1 == MTRR_TYPE_UNCACHABLE ||
@@ -738,13 +736,7 @@ void mtrr_ap_init(void)
*/
void mtrr_save_state(void)
{
- int cpu = get_cpu();
-
- if (cpu == 0)
- mtrr_save_fixed_ranges(NULL);
- else
- smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
- put_cpu();
+ smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
}
static int __init mtrr_init_finialize(void)
@@ -754,7 +746,7 @@ static int __init mtrr_init_finialize(void)
if (use_intel())
mtrr_state_warn();
else {
- /* The CPUs haven't MTRR and seemes not support SMP. They have
+ /* The CPUs haven't MTRR and seem to not support SMP. They have
* specific drivers, we use a tricky method to support
* suspend/resume for them.
* TBD: is there any system with such CPU which supports
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 93fecd4b03d..c02541e6e65 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -34,7 +34,7 @@ struct wd_ops {
u64 checkbit;
};
-static struct wd_ops *wd_ops;
+static const struct wd_ops *wd_ops;
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
* offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
@@ -120,7 +120,9 @@ int reserve_perfctr_nmi(unsigned int msr)
unsigned int counter;
counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+ /* register not managed by the allocator? */
+ if (counter > NMI_MAX_COUNTER_BITS)
+ return 1;
if (!test_and_set_bit(counter, perfctr_nmi_owner))
return 1;
@@ -132,7 +134,9 @@ void release_perfctr_nmi(unsigned int msr)
unsigned int counter;
counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+ /* register not managed by the allocator? */
+ if (counter > NMI_MAX_COUNTER_BITS)
+ return;
clear_bit(counter, perfctr_nmi_owner);
}
@@ -142,7 +146,9 @@ int reserve_evntsel_nmi(unsigned int msr)
unsigned int counter;
counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+ /* register not managed by the allocator? */
+ if (counter > NMI_MAX_COUNTER_BITS)
+ return 1;
if (!test_and_set_bit(counter, evntsel_nmi_owner))
return 1;
@@ -154,7 +160,9 @@ void release_evntsel_nmi(unsigned int msr)
unsigned int counter;
counter = nmi_evntsel_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+ /* register not managed by the allocator? */
+ if (counter > NMI_MAX_COUNTER_BITS)
+ return;
clear_bit(counter, evntsel_nmi_owner);
}
@@ -317,7 +325,7 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
}
-static struct wd_ops k7_wd_ops = {
+static const struct wd_ops k7_wd_ops = {
.reserve = single_msr_reserve,
.unreserve = single_msr_unreserve,
.setup = setup_k7_watchdog,
@@ -380,7 +388,7 @@ static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
}
-static struct wd_ops p6_wd_ops = {
+static const struct wd_ops p6_wd_ops = {
.reserve = single_msr_reserve,
.unreserve = single_msr_unreserve,
.setup = setup_p6_watchdog,
@@ -532,7 +540,7 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
}
-static struct wd_ops p4_wd_ops = {
+static const struct wd_ops p4_wd_ops = {
.reserve = p4_reserve,
.unreserve = p4_unreserve,
.setup = setup_p4_watchdog,
@@ -550,6 +558,8 @@ static struct wd_ops p4_wd_ops = {
#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+static struct wd_ops intel_arch_wd_ops;
+
static int setup_intel_arch_watchdog(unsigned nmi_hz)
{
unsigned int ebx;
@@ -591,11 +601,11 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
wd->perfctr_msr = perfctr_msr;
wd->evntsel_msr = evntsel_msr;
wd->cccr_msr = 0; //unused
- wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1);
+ intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
return 1;
}
-static struct wd_ops intel_arch_wd_ops = {
+static struct wd_ops intel_arch_wd_ops __read_mostly = {
.reserve = single_msr_reserve,
.unreserve = single_msr_unreserve,
.setup = setup_intel_arch_watchdog,
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 1e31b6caffb..066f8c6af4d 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -49,7 +49,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* Intel-defined (#2) */
"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
+ NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* VIA/Cyrix/Centaur-defined */
@@ -59,10 +59,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
- "altmovcr8", "abm", "sse4a",
- "misalignsse", "3dnowprefetch",
- "osvw", "ibs", NULL, NULL, NULL, NULL,
+ "lahf_lm", "cmp_legacy", "svm", "extapic",
+ "cr8_legacy", "abm", "sse4a", "misalignsse",
+ "3dnowprefetch", "osvw", "ibs", "sse5",
+ "skinit", "wdt", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -85,12 +85,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* nothing */
};
struct cpuinfo_x86 *c = v;
- int i, n = c - cpu_data;
+ int i, n = 0;
int fpu_exception;
#ifdef CONFIG_SMP
if (!cpu_online(n))
return 0;
+ n = c->cpu_index;
#endif
seq_printf(m, "processor\t: %d\n"
"vendor_id\t: %s\n"
@@ -122,7 +123,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#ifdef CONFIG_X86_HT
if (c->x86_max_cores * smp_num_siblings > 1) {
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
- seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n]));
+ seq_printf(m, "siblings\t: %d\n",
+ cpus_weight(per_cpu(cpu_core_map, n)));
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
}
@@ -174,11 +176,15 @@ static int show_cpuinfo(struct seq_file *m, void *v)
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+ if (*pos == 0) /* just in case, cpu 0 is not the first */
+ *pos = first_cpu(cpu_possible_map);
+ if ((*pos) < NR_CPUS && cpu_possible(*pos))
+ return &cpu_data(*pos);
+ return NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
- ++*pos;
+ *pos = next_cpu(*pos, cpu_possible_map);
return c_start(m, pos);
}
static void c_stop(struct seq_file *m, void *v)
diff --git a/arch/x86/kernel/cpufreq/Kconfig b/arch/x86/kernel/cpufreq/Kconfig
deleted file mode 100644
index a3fd51926cb..00000000000
--- a/arch/x86/kernel/cpufreq/Kconfig
+++ /dev/null
@@ -1,108 +0,0 @@
-#
-# CPU Frequency scaling
-#
-
-menu "CPU Frequency scaling"
-
-source "drivers/cpufreq/Kconfig"
-
-if CPU_FREQ
-
-comment "CPUFreq processor drivers"
-
-config X86_POWERNOW_K8
- tristate "AMD Opteron/Athlon64 PowerNow!"
- select CPU_FREQ_TABLE
- help
- This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors.
-
- To compile this driver as a module, choose M here: the
- module will be called powernow-k8.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_POWERNOW_K8_ACPI
- bool
- depends on X86_POWERNOW_K8 && ACPI_PROCESSOR
- depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
- default y
-
-config X86_SPEEDSTEP_CENTRINO
- tristate "Intel Enhanced SpeedStep (deprecated)"
- select CPU_FREQ_TABLE
- depends on ACPI_PROCESSOR
- help
- This is deprecated and this functionality is now merged into
- acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
- speedstep_centrino.
- This adds the CPUFreq driver for Enhanced SpeedStep enabled
- mobile CPUs. This means Intel Pentium M (Centrino) CPUs
- or 64bit enabled Intel Xeons.
-
- To compile this driver as a module, choose M here: the
- module will be called speedstep-centrino.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-config X86_ACPI_CPUFREQ
- tristate "ACPI Processor P-States driver"
- select CPU_FREQ_TABLE
- depends on ACPI_PROCESSOR
- help
- This driver adds a CPUFreq driver which utilizes the ACPI
- Processor Performance States.
- This driver also supports Intel Enhanced Speedstep.
-
- To compile this driver as a module, choose M here: the
- module will be called acpi-cpufreq.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-comment "shared options"
-
-config X86_ACPI_CPUFREQ_PROC_INTF
- bool "/proc/acpi/processor/../performance interface (deprecated)"
- depends on PROC_FS
- depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K8_ACPI
- help
- This enables the deprecated /proc/acpi/processor/../performance
- interface. While it is helpful for debugging, the generic,
- cross-architecture cpufreq interfaces should be used.
-
- If in doubt, say N.
-
-config X86_P4_CLOCKMOD
- tristate "Intel Pentium 4 clock modulation"
- depends on EMBEDDED
- select CPU_FREQ_TABLE
- help
- This adds the clock modulation driver for Intel Pentium 4 / XEON
- processors. When enabled it will lower CPU temperature by skipping
- clocks.
-
- This driver should be only used in exceptional
- circumstances when very low power is needed because it causes severe
- slowdowns and noticeable latencies. Normally Speedstep should be used
- instead.
-
- To compile this driver as a module, choose M here: the
- module will be called p4-clockmod.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- Unless you are absolutely sure say N.
-
-
-config X86_SPEEDSTEP_LIB
- tristate
- default X86_P4_CLOCKMOD
-
-endif
-
-endmenu
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index f4548c93ccf..05c9936a16c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -43,8 +43,6 @@
static struct class *cpuid_class;
-#ifdef CONFIG_SMP
-
struct cpuid_command {
u32 reg;
u32 *data;
@@ -62,25 +60,11 @@ static inline void do_cpuid(int cpu, u32 reg, u32 * data)
{
struct cpuid_command cmd;
- preempt_disable();
- if (cpu == smp_processor_id()) {
- cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
- } else {
- cmd.reg = reg;
- cmd.data = data;
+ cmd.reg = reg;
+ cmd.data = data;
- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
- }
- preempt_enable();
+ smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
}
-#else /* ! CONFIG_SMP */
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
- cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
-}
-
-#endif /* ! CONFIG_SMP */
static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
{
@@ -130,7 +114,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
static int cpuid_open(struct inode *inode, struct file *file)
{
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
- struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
if (cpu >= NR_CPUS || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
@@ -150,32 +134,40 @@ static const struct file_operations cpuid_fops = {
.open = cpuid_open,
};
-static int cpuid_device_create(int i)
+static __cpuinit int cpuid_device_create(int cpu)
{
- int err = 0;
struct device *dev;
- dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), "cpu%d",i);
- if (IS_ERR(dev))
- err = PTR_ERR(dev);
- return err;
+ dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu),
+ "cpu%d", cpu);
+ return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+static void cpuid_device_destroy(int cpu)
+{
+ device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
}
-static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
+ int err = 0;
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- cpuid_device_create(cpu);
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ err = cpuid_device_create(cpu);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+ cpuid_device_destroy(cpu);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier =
@@ -212,7 +204,7 @@ static int __init cpuid_init(void)
out_class:
i = 0;
for_each_online_cpu(i) {
- device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i));
+ cpuid_device_destroy(i);
}
class_destroy(cpuid_class);
out_chrdev:
@@ -226,7 +218,7 @@ static void __exit cpuid_exit(void)
int cpu = 0;
for_each_online_cpu(cpu)
- device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+ cpuid_device_destroy(cpu);
class_destroy(cpuid_class);
unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
diff --git a/arch/x86/kernel/crash_32.c b/arch/x86/kernel/crash.c
index 53589d1b1a0..8bb482ff091 100644
--- a/arch/x86/kernel/crash_32.c
+++ b/arch/x86/kernel/crash.c
@@ -1,5 +1,5 @@
/*
- * Architecture specific (i386) functions for kexec based crash dumps.
+ * Architecture specific (i386/x86_64) functions for kexec based crash dumps.
*
* Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
*
@@ -25,8 +25,11 @@
#include <linux/kdebug.h>
#include <asm/smp.h>
+#ifdef CONFIG_X86_32
#include <mach_ipi.h>
-
+#else
+#include <asm/mach_apic.h>
+#endif
/* This keeps a track of which one is crashing cpu. */
static int crashing_cpu;
@@ -38,7 +41,9 @@ static int crash_nmi_callback(struct notifier_block *self,
unsigned long val, void *data)
{
struct pt_regs *regs;
+#ifdef CONFIG_X86_32
struct pt_regs fixed_regs;
+#endif
int cpu;
if (val != DIE_NMI_IPI)
@@ -55,10 +60,12 @@ static int crash_nmi_callback(struct notifier_block *self,
return NOTIFY_STOP;
local_irq_disable();
+#ifdef CONFIG_X86_32
if (!user_mode_vm(regs)) {
crash_fixup_ss_esp(&fixed_regs, regs);
regs = &fixed_regs;
}
+#endif
crash_save_cpu(regs, cpu);
disable_local_APIC();
atomic_dec(&waiting_for_crash_ipi);
diff --git a/arch/x86/kernel/crash_64.c b/arch/x86/kernel/crash_64.c
deleted file mode 100644
index 13432a1ae90..00000000000
--- a/arch/x86/kernel/crash_64.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Architecture specific (x86_64) functions for kexec based crash dumps.
- *
- * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
- *
- * Copyright (C) IBM Corporation, 2004. All rights reserved.
- *
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/irq.h>
-#include <linux/reboot.h>
-#include <linux/kexec.h>
-#include <linux/delay.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
-#include <linux/kdebug.h>
-
-#include <asm/processor.h>
-#include <asm/hardirq.h>
-#include <asm/nmi.h>
-#include <asm/hw_irq.h>
-#include <asm/mach_apic.h>
-
-/* This keeps a track of which one is crashing cpu. */
-static int crashing_cpu;
-
-#ifdef CONFIG_SMP
-static atomic_t waiting_for_crash_ipi;
-
-static int crash_nmi_callback(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct pt_regs *regs;
- int cpu;
-
- if (val != DIE_NMI_IPI)
- return NOTIFY_OK;
-
- regs = ((struct die_args *)data)->regs;
- cpu = raw_smp_processor_id();
-
- /*
- * Don't do anything if this handler is invoked on crashing cpu.
- * Otherwise, system will completely hang. Crashing cpu can get
- * an NMI if system was initially booted with nmi_watchdog parameter.
- */
- if (cpu == crashing_cpu)
- return NOTIFY_STOP;
- local_irq_disable();
-
- crash_save_cpu(regs, cpu);
- disable_local_APIC();
- atomic_dec(&waiting_for_crash_ipi);
- /* Assume hlt works */
- for(;;)
- halt();
-
- return 1;
-}
-
-static void smp_send_nmi_allbutself(void)
-{
- send_IPI_allbutself(NMI_VECTOR);
-}
-
-/*
- * This code is a best effort heuristic to get the
- * other cpus to stop executing. So races with
- * cpu hotplug shouldn't matter.
- */
-
-static struct notifier_block crash_nmi_nb = {
- .notifier_call = crash_nmi_callback,
-};
-
-static void nmi_shootdown_cpus(void)
-{
- unsigned long msecs;
-
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- if (register_die_notifier(&crash_nmi_nb))
- return; /* return what? */
-
- /*
- * Ensure the new callback function is set before sending
- * out the NMI
- */
- wmb();
-
- smp_send_nmi_allbutself();
-
- msecs = 1000; /* Wait at most a second for the other cpus to stop */
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
- mdelay(1);
- msecs--;
- }
- /* Leave the nmi callback set */
- disable_local_APIC();
-}
-#else
-static void nmi_shootdown_cpus(void)
-{
- /* There are no cpus to shootdown */
-}
-#endif
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
- /*
- * This function is only called after the system
- * has panicked or is otherwise in a critical state.
- * The minimum amount of code to allow a kexec'd kernel
- * to run successfully needs to happen here.
- *
- * In practice this means shooting down the other cpus in
- * an SMP system.
- */
- /* The kernel is broken so disable interrupts */
- local_irq_disable();
-
- /* Make a note of crashing cpu. Will be used in NMI callback.*/
- crashing_cpu = smp_processor_id();
- nmi_shootdown_cpus();
-
- if(cpu_has_apic)
- disable_local_APIC();
-
- disable_IO_APIC();
-
- crash_save_cpu(regs, smp_processor_id());
-}
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 32e75d0731a..72d0c56c1b4 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -47,6 +47,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!kdump_buf_page) {
printk(KERN_WARNING "Kdump: Kdump buffer page not"
" allocated\n");
+ kunmap_atomic(vaddr, KM_PTE0);
return -EFAULT;
}
copy_page(kdump_buf_page, vaddr);
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 3c86b979a40..18f500d185a 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -51,6 +51,13 @@ struct resource code_resource = {
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
+struct resource bss_resource = {
+ .name = "Kernel bss",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
static struct resource system_rom_resource = {
.name = "System ROM",
.start = 0xf0000,
@@ -254,7 +261,9 @@ static void __init probe_roms(void)
* and also for regions reported as reserved by the e820.
*/
static void __init
-legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
+legacy_init_iomem_resources(struct resource *code_resource,
+ struct resource *data_resource,
+ struct resource *bss_resource)
{
int i;
@@ -287,8 +296,10 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
*/
request_resource(res, code_resource);
request_resource(res, data_resource);
+ request_resource(res, bss_resource);
#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
+ if (crashk_res.start != crashk_res.end)
+ request_resource(res, &crashk_res);
#endif
}
}
@@ -306,9 +317,11 @@ static int __init request_standard_resources(void)
printk("Setting up standard PCI resources\n");
if (efi_enabled)
- efi_initialize_iomem_resources(&code_resource, &data_resource);
+ efi_initialize_iomem_resources(&code_resource,
+ &data_resource, &bss_resource);
else
- legacy_init_iomem_resources(&code_resource, &data_resource);
+ legacy_init_iomem_resources(&code_resource,
+ &data_resource, &bss_resource);
/* EFI systems may still have VGA */
request_resource(&iomem_resource, &video_ram_resource);
@@ -705,7 +718,7 @@ void __init e820_register_memory(void)
int i;
/*
- * Search for the bigest gap in the low 32 bits of the e820
+ * Search for the biggest gap in the low 32 bits of the e820
* memory space.
*/
last = 0x100000000ull;
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 0f4d5e209e9..04698e0b056 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -24,7 +24,7 @@
#include <asm/page.h>
#include <asm/e820.h>
#include <asm/proto.h>
-#include <asm/bootsetup.h>
+#include <asm/setup.h>
#include <asm/sections.h>
struct e820map e820;
@@ -47,7 +47,7 @@ unsigned long end_pfn_map;
*/
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
-extern struct resource code_resource, data_resource;
+extern struct resource code_resource, data_resource, bss_resource;
/* Check for some hardcoded bad areas that early boot is not allowed to touch */
static inline int bad_addr(unsigned long *addrp, unsigned long size)
@@ -68,10 +68,15 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
/* initrd */
#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
- addr < INITRD_START+INITRD_SIZE) {
- *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
- return 1;
+ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+ unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
+ unsigned long ramdisk_end = ramdisk_image+ramdisk_size;
+
+ if (last >= ramdisk_image && addr < ramdisk_end) {
+ *addrp = PAGE_ALIGN(ramdisk_end);
+ return 1;
+ }
}
#endif
/* kernel code */
@@ -220,8 +225,10 @@ void __init e820_reserve_resources(void)
*/
request_resource(res, &code_resource);
request_resource(res, &data_resource);
+ request_resource(res, &bss_resource);
#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
+ if (crashk_res.start != crashk_res.end)
+ request_resource(res, &crashk_res);
#endif
}
}
@@ -594,8 +601,8 @@ void __init setup_memory_region(void)
* Otherwise fake a memory map; one section from 0k->640k,
* the next section from 1mb->appropriate_mem_k
*/
- sanitize_e820_map(E820_MAP, &E820_MAP_NR);
- if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
+ sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
+ if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
early_panic("Cannot find a valid memory map");
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
e820_print_map("BIOS-e820");
@@ -723,3 +730,22 @@ __init void e820_setup_gap(void)
printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
pci_mem_start, gapstart, gapsize);
}
+
+int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
+{
+ int i;
+
+ if (slot < 0 || slot >= e820.nr_map)
+ return -1;
+ for (i = slot; i < e820.nr_map; i++) {
+ if (e820.map[i].type != E820_RAM)
+ continue;
+ break;
+ }
+ if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
+ return -1;
+ *addr = e820.map[i].addr;
+ *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
+ max_pfn << PAGE_SHIFT) - *addr;
+ return i + 1;
+}
diff --git a/arch/x86/kernel/early-quirks_64.c b/arch/x86/kernel/early-quirks.c
index 13aa4fd728f..88bb83ec895 100644
--- a/arch/x86/kernel/early-quirks_64.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -13,33 +13,41 @@
#include <linux/acpi.h>
#include <linux/pci_ids.h>
#include <asm/pci-direct.h>
-#include <asm/proto.h>
-#include <asm/iommu.h>
#include <asm/dma.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+
+#ifdef CONFIG_GART_IOMMU
+#include <asm/gart.h>
+#endif
static void __init via_bugs(void)
{
-#ifdef CONFIG_IOMMU
+#ifdef CONFIG_GART_IOMMU
if ((end_pfn > MAX_DMA32_PFN || force_iommu) &&
- !iommu_aperture_allowed) {
+ !gart_iommu_aperture_allowed) {
printk(KERN_INFO
- "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n");
- iommu_aperture_disabled = 1;
+ "Looks like a VIA chipset. Disabling IOMMU."
+ " Override with iommu=allowed\n");
+ gart_iommu_aperture_disabled = 1;
}
#endif
}
#ifdef CONFIG_ACPI
+#ifdef CONFIG_X86_IO_APIC
static int __init nvidia_hpet_check(struct acpi_table_header *header)
{
return 0;
}
-#endif
+#endif /* CONFIG_X86_IO_APIC */
+#endif /* CONFIG_ACPI */
static void __init nvidia_bugs(void)
{
#ifdef CONFIG_ACPI
+#ifdef CONFIG_X86_IO_APIC
/*
* All timer overrides on Nvidia are
* wrong unless HPET is enabled.
@@ -59,17 +67,20 @@ static void __init nvidia_bugs(void)
"try acpi_use_timer_override\n");
}
#endif
+#endif
/* RED-PEN skip them on mptables too? */
}
static void __init ati_bugs(void)
{
+#ifdef CONFIG_X86_IO_APIC
if (timer_over_8254 == 1) {
timer_over_8254 = 0;
printk(KERN_INFO
- "ATI board detected. Disabling timer routing over 8254.\n");
+ "ATI board detected. Disabling timer routing over 8254.\n");
}
+#endif
}
struct chipset {
@@ -104,7 +115,7 @@ void __init early_quirks(void)
if (class == 0xffffffff)
break;
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
+ if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
continue;
vendor = read_pci_config(num, slot, func,
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index fd9aff3f389..b7d6c23f287 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -6,15 +6,10 @@
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/fcntl.h>
+#include <asm/setup.h>
#include <xen/hvc-console.h>
/* Simple VGA output */
-
-#ifdef __i386__
-#include <asm/setup.h>
-#else
-#include <asm/bootsetup.h>
-#endif
#define VGABASE (__ISA_IO_base + 0xb8000)
static int max_ypos = 25, max_xpos = 80;
@@ -234,10 +229,10 @@ static int __init setup_early_printk(char *buf)
early_serial_init(buf);
early_console = &early_serial_console;
} else if (!strncmp(buf, "vga", 3)
- && SCREEN_INFO.orig_video_isVGA == 1) {
- max_xpos = SCREEN_INFO.orig_video_cols;
- max_ypos = SCREEN_INFO.orig_video_lines;
- current_ypos = SCREEN_INFO.orig_y;
+ && boot_params.screen_info.orig_video_isVGA == 1) {
+ max_xpos = boot_params.screen_info.orig_video_cols;
+ max_ypos = boot_params.screen_info.orig_video_lines;
+ current_ypos = boot_params.screen_info.orig_y;
early_console = &early_vga_console;
} else if (!strncmp(buf, "simnow", 6)) {
simnow_init(buf + 6);
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 2452c6fbe99..e2be78f4939 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -331,11 +331,13 @@ void __init efi_init(void)
memset(&efi, 0, sizeof(efi) );
memset(&efi_phys, 0, sizeof(efi_phys));
- efi_phys.systab = EFI_SYSTAB;
- memmap.phys_map = EFI_MEMMAP;
- memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
- memmap.desc_version = EFI_MEMDESC_VERSION;
- memmap.desc_size = EFI_MEMDESC_SIZE;
+ efi_phys.systab =
+ (efi_system_table_t *)boot_params.efi_info.efi_systab;
+ memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
+ memmap.nr_map = boot_params.efi_info.efi_memmap_size/
+ boot_params.efi_info.efi_memdesc_size;
+ memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
+ memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
efi.systab = (efi_system_table_t *)
boot_ioremap((unsigned long) efi_phys.systab,
@@ -446,7 +448,8 @@ void __init efi_init(void)
printk(KERN_ERR PFX "Could not map the runtime service table!\n");
/* Map the EFI memory map for use until paging_init() */
- memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
+ memmap.map = boot_ioremap(boot_params.efi_info.efi_memmap,
+ boot_params.efi_info.efi_memmap_size);
if (memmap.map == NULL)
printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
@@ -600,7 +603,8 @@ void __init efi_enter_virtual_mode(void)
void __init
efi_initialize_iomem_resources(struct resource *code_resource,
- struct resource *data_resource)
+ struct resource *data_resource,
+ struct resource *bss_resource)
{
struct resource *res;
efi_memory_desc_t *md;
@@ -672,6 +676,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
if (md->type == EFI_CONVENTIONAL_MEMORY) {
request_resource(res, code_resource);
request_resource(res, data_resource);
+ request_resource(res, bss_resource);
#ifdef CONFIG_KEXEC
request_resource(res, &crashk_res);
#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 8099fea0a72..dc7f938e501 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -437,7 +437,7 @@ ldt_ss:
* is still available to implement the setting of the high
* 16-bits in the INTERRUPT_RETURN paravirt-op.
*/
- cmpl $0, paravirt_ops+PARAVIRT_enabled
+ cmpl $0, pv_info+PARAVIRT_enabled
jne restore_nocheck
#endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index f1cacd4897f..3a058bb1640 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -988,7 +988,7 @@ child_rip:
movq %rsi, %rdi
call *%rax
# exit
- xorl %edi, %edi
+ mov %eax, %edi
call do_exit
CFI_ENDPROC
ENDPROC(child_rip)
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 47496a40e84..ce703e21c91 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -24,12 +24,19 @@
#include <acpi/acpi_bus.h>
#endif
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
+/*
+ * which logical CPU number maps to which CPU (physical APIC ID)
+ *
+ * The following static array is used during kernel startup
+ * and the x86_cpu_to_apicid_ptr contains the address of the
+ * array during this time. Is it zeroed when the per_cpu
+ * data area is removed.
+ */
+u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata
= { [0 ... NR_CPUS-1] = BAD_APICID };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
-
-u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+void *x86_cpu_to_apicid_ptr;
+DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
struct genapic __read_mostly *genapic = &apic_flat;
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index ecb01eefdd2..07352b74bda 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -52,7 +52,6 @@ static void flat_init_apic_ldr(void)
num = smp_processor_id();
id = 1UL << num;
- x86_cpu_to_log_apicid[num] = id;
apic_write(APIC_DFR, APIC_DFR_FLAT);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
@@ -173,7 +172,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
*/
cpu = first_cpu(cpumask);
if ((unsigned)cpu < NR_CPUS)
- return x86_cpu_to_apicid[cpu];
+ return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8561f626eda..6b3469311e4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -14,7 +14,6 @@
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/smp.h>
-#include <asm/bootsetup.h>
#include <asm/setup.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
@@ -36,26 +35,15 @@ static void __init clear_bss(void)
(unsigned long) __bss_stop - (unsigned long) __bss_start);
}
-#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#define OLD_CL_MAGIC_ADDR 0x20
-#define OLD_CL_MAGIC 0xA33F
-#define OLD_CL_OFFSET 0x22
-
static void __init copy_bootdata(char *real_mode_data)
{
- unsigned long new_data;
char * command_line;
- memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
- new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
- if (!new_data) {
- if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
- return;
- }
- new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
+ memcpy(&boot_params, real_mode_data, sizeof boot_params);
+ if (boot_params.hdr.cmd_line_ptr) {
+ command_line = __va(boot_params.hdr.cmd_line_ptr);
+ memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
- command_line = __va(new_data);
- memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
void __init x86_64_start_kernel(char * real_mode_data)
@@ -70,7 +58,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
for (i = 0; i < IDT_ENTRIES; i++)
set_intr_gate(i, early_idt_handler);
- asm volatile("lidt %0" :: "m" (idt_descr));
+ load_idt((const struct desc_ptr *)&idt_descr);
early_printk("Kernel alive\n");
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 9150ca9b5f8..374b7ece896 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -51,6 +51,15 @@
*/
LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
+/*
+ * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
+ * pagetables from above the 16MB DMA limit, so we'll have to set
+ * up pagetables 16MB more (worst-case):
+ */
+#ifdef CONFIG_DEBUG_PAGEALLOC
+LOW_PAGES = LOW_PAGES + 0x1000000
+#endif
+
#if PTRS_PER_PMD > 1
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
#else
@@ -70,22 +79,30 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
*/
.section .text.head,"ax",@progbits
ENTRY(startup_32)
+ /* check to see if KEEP_SEGMENTS flag is meaningful */
+ cmpw $0x207, BP_version(%esi)
+ jb 1f
+
+ /* test KEEP_SEGMENTS flag to see if the bootloader is asking
+ us to not reload segments */
+ testb $(1<<6), BP_loadflags(%esi)
+ jnz 2f
/*
* Set segments to known values.
*/
- cld
- lgdt boot_gdt_descr - __PAGE_OFFSET
+1: lgdt boot_gdt_descr - __PAGE_OFFSET
movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
+2:
/*
* Clear BSS first so that there are no surprises...
- * No need to cld as DF is already clear from cld above...
*/
+ cld
xorl %eax,%eax
movl $__bss_start - __PAGE_OFFSET,%edi
movl $__bss_stop - __PAGE_OFFSET,%ecx
@@ -107,18 +124,42 @@ ENTRY(startup_32)
movsl
movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
andl %esi,%esi
- jnz 2f # New command line protocol
- cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
- jne 1f
- movzwl OLD_CL_OFFSET,%esi
- addl $(OLD_CL_BASE_ADDR),%esi
-2:
+ jz 1f # No comand line
movl $(boot_command_line - __PAGE_OFFSET),%edi
movl $(COMMAND_LINE_SIZE/4),%ecx
rep
movsl
1:
+#ifdef CONFIG_PARAVIRT
+ cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
+ jb default_entry
+
+ /* Paravirt-compatible boot parameters. Look to see what architecture
+ we're booting under. */
+ movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
+ cmpl $num_subarch_entries, %eax
+ jae bad_subarch
+
+ movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
+ subl $__PAGE_OFFSET, %eax
+ jmp *%eax
+
+bad_subarch:
+WEAK(lguest_entry)
+WEAK(xen_entry)
+ /* Unknown implementation; there's really
+ nothing we can do at this point. */
+ ud2a
+.data
+subarch_entries:
+ .long default_entry /* normal x86/PC */
+ .long lguest_entry /* lguest hypervisor */
+ .long xen_entry /* Xen hypervisor */
+num_subarch_entries = (. - subarch_entries) / 4
+.previous
+#endif /* CONFIG_PARAVIRT */
+
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond _end. The variable
@@ -131,6 +172,7 @@ ENTRY(startup_32)
*/
page_pde_offset = (__PAGE_OFFSET >> 20);
+default_entry:
movl $(pg0 - __PAGE_OFFSET), %edi
movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
@@ -443,6 +485,7 @@ early_page_fault:
early_fault:
cld
#ifdef CONFIG_PRINTK
+ pusha
movl $(__KERNEL_DS),%eax
movl %eax,%ds
movl %eax,%es
@@ -534,8 +577,15 @@ int_msg:
.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
fault_msg:
- .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
- .asciz "Stack: %p %p %p %p %p %p %p %p\n"
+ .ascii \
+/* fault info: */ "BUG: Int %d: CR2 %p\n" \
+/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \
+ " EBX %p EDX %p ECX %p EAX %p\n" \
+/* fault frame: */ " err %p EIP %p CS %p flg %p\n" \
+ \
+ "Stack: %p %p %p %p %p %p %p %p\n" \
+ " %p %p %p %p %p %p %p %p\n" \
+ " %p %p %p %p %p %p %p %p\n"
#include "../../x86/xen/xen-head.S"
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index f8367074da0..53303f2e547 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -69,12 +69,15 @@ static inline void hpet_clear_mapping(void)
* HPET command line enable / disable
*/
static int boot_hpet_disable;
+int hpet_force_user;
static int __init hpet_setup(char* str)
{
if (str) {
if (!strncmp("disable", str, 7))
boot_hpet_disable = 1;
+ if (!strncmp("force", str, 5))
+ hpet_force_user = 1;
}
return 1;
}
@@ -350,7 +353,7 @@ static int hpet_clocksource_register(void)
*
* hpet period is in femto seconds per cycle
* so we need to convert this to ns/cyc units
- * aproximated by mult/2^shift
+ * approximated by mult/2^shift
*
* fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
* fsec/cyc * 1ns/1000000fsec * 2^shift = mult
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index e3d4b73bfdb..edd39ccf139 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,4 +1,5 @@
#include <linux/module.h>
+#include <asm/semaphore.h>
#include <asm/checksum.h>
#include <asm/desc.h>
diff --git a/arch/x86/kernel/i387_64.c b/arch/x86/kernel/i387_64.c
index 56c1f114710..bfaff28fb13 100644
--- a/arch/x86/kernel/i387_64.c
+++ b/arch/x86/kernel/i387_64.c
@@ -92,13 +92,14 @@ int save_i387(struct _fpstate __user *buf)
if (task_thread_info(tsk)->status & TS_USEDFPU) {
err = save_i387_checking((struct i387_fxsave_struct __user *)buf);
if (err) return err;
+ task_thread_info(tsk)->status &= ~TS_USEDFPU;
stts();
- } else {
- if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
+ } else {
+ if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
sizeof(struct i387_fxsave_struct)))
return -1;
- }
- return 1;
+ }
+ return 1;
}
/*
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 5cc8841ca2c..a42c8074532 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -86,7 +86,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
* On UP the PIT can serve all of the possible timer functions. On SMP systems
* it can be solely used for the global tick.
*
- * The profiling and update capabilites are switched off once the local apic is
+ * The profiling and update capabilities are switched off once the local apic is
* registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
* !using_apic_timer decisions in do_timer_interrupt_hook()
*/
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index 679bb33acbf..f634fc715c9 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -349,7 +349,11 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
* New motherboards sometimes make IRQ 13 be a PCI interrupt,
* so allow interrupt sharing.
*/
-static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL };
+static struct irqaction fpu_irq = {
+ .handler = math_error_irq,
+ .mask = CPU_MASK_NONE,
+ .name = "fpu",
+};
void __init init_ISA_irqs (void)
{
@@ -399,7 +403,8 @@ void __init native_init_IRQ(void)
int vector = FIRST_EXTERNAL_VECTOR + i;
if (i >= NR_IRQS)
break;
- if (vector != SYSCALL_VECTOR)
+ /* SYSCALL_VECTOR was reserved in trap_init. */
+ if (!test_bit(vector, used_vectors))
set_intr_gate(vector, interrupt[i]);
}
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index eb72976cc13..3f27ea0b981 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -395,7 +395,11 @@ device_initcall(i8259A_init_sysfs);
* IRQ2 is cascade interrupt to second interrupt controller
*/
-static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
+static struct irqaction irq2 = {
+ .handler = no_action,
+ .mask = CPU_MASK_NONE,
+ .name = "cascade",
+};
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
[0 ... IRQ0_VECTOR - 1] = -1,
[IRQ0_VECTOR] = 0,
diff --git a/arch/x86/kernel/init_task_32.c b/arch/x86/kernel/init_task.c
index d26fc063a76..468c9c43784 100644
--- a/arch/x86/kernel/init_task_32.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@ static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
-
EXPORT_SYMBOL(init_mm);
/*
@@ -25,7 +24,7 @@ EXPORT_SYMBOL(init_mm);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
+union thread_union init_thread_union
__attribute__((__section__(".data.init_task"))) =
{ INIT_THREAD_INFO(init_task) };
@@ -35,12 +34,14 @@ union thread_union init_thread_union
* All other task structs will be allocated on slabs in fork.c
*/
struct task_struct init_task = INIT_TASK(init_task);
-
EXPORT_SYMBOL(init_task);
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's.
- */
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
diff --git a/arch/x86/kernel/init_task_64.c b/arch/x86/kernel/init_task_64.c
deleted file mode 100644
index 4ff33d4f855..00000000000
--- a/arch/x86/kernel/init_task_64.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
-/* Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
-#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index e2f4a1c6854..f35c6eb33da 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -378,7 +378,7 @@ static struct irq_cpu_info {
#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
static cpumask_t balance_irq_affinity[NR_IRQS] = {
[0 ... NR_IRQS-1] = CPU_MASK_ALL
@@ -584,7 +584,7 @@ tryanotherirq:
imbalance = move_this_load;
- /* For physical_balance case, we accumlated both load
+ /* For physical_balance case, we accumulated both load
* values in the one of the siblings cpu_irq[],
* to use the same code for physical and logical processors
* as much as possible.
@@ -598,7 +598,7 @@ tryanotherirq:
* (A+B)/2 vs B
*/
load = CPU_IRQ(min_loaded) >> 1;
- for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+ for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
if (load > CPU_IRQ(j)) {
/* This won't change cpu_sibling_map[min_loaded] */
load = CPU_IRQ(j);
@@ -1198,7 +1198,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }
static int __assign_irq_vector(int irq)
{
static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
- int vector, offset, i;
+ int vector, offset;
BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
@@ -1215,11 +1215,8 @@ next:
}
if (vector == current_vector)
return -ENOSPC;
- if (vector == SYSCALL_VECTOR)
+ if (test_and_set_bit(vector, used_vectors))
goto next;
- for (i = 0; i < NR_IRQ_VECTORS; i++)
- if (irq_vector[i] == vector)
- goto next;
current_vector = vector;
current_offset = offset;
@@ -1296,6 +1293,11 @@ static void __init setup_IO_APIC_irqs(void)
continue;
}
+ if (!first_notcon) {
+ apic_printk(APIC_VERBOSE, " not connected.\n");
+ first_notcon = 1;
+ }
+
entry.trigger = irq_trigger(idx);
entry.polarity = irq_polarity(idx);
@@ -2290,6 +2292,12 @@ static inline void __init check_timer(void)
void __init setup_IO_APIC(void)
{
+ int i;
+
+ /* Reserve all the system vectors. */
+ for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++)
+ set_bit(i, used_vectors);
+
enable_IO_APIC();
if (acpi_ioapic)
@@ -2467,7 +2475,7 @@ void destroy_irq(unsigned int irq)
}
/*
- * MSI mesage composition
+ * MSI message composition
*/
#ifdef CONFIG_PCI_MSI
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 966fa106249..953328b55a3 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -31,6 +31,7 @@
#include <linux/sysdev.h>
#include <linux/msi.h>
#include <linux/htirq.h>
+#include <linux/dmar.h>
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
@@ -875,6 +876,10 @@ static void __init setup_IO_APIC_irqs(void)
apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
continue;
}
+ if (!first_notcon) {
+ apic_printk(APIC_VERBOSE, " not connected.\n");
+ first_notcon = 1;
+ }
irq = pin_2_irq(idx, apic, pin);
add_pin_to_irq(irq, apic, pin);
@@ -885,7 +890,7 @@ static void __init setup_IO_APIC_irqs(void)
}
if (!first_notcon)
- apic_printk(APIC_VERBOSE," not connected.\n");
+ apic_printk(APIC_VERBOSE, " not connected.\n");
}
/*
@@ -1766,7 +1771,7 @@ __setup("no_timer_check", notimercheck);
/*
*
- * IRQ's that are handled by the PIC in the MPS IOAPIC case.
+ * IRQs that are handled by the PIC in the MPS IOAPIC case.
* - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
* Linux doesn't really care, as it's not actually used
* for any interrupt handling anyway.
@@ -1845,7 +1850,7 @@ static struct sysdev_class ioapic_sysdev_class = {
static int __init ioapic_init_sysfs(void)
{
struct sys_device * dev;
- int i, size, error = 0;
+ int i, size, error;
error = sysdev_class_register(&ioapic_sysdev_class);
if (error)
@@ -1854,12 +1859,11 @@ static int __init ioapic_init_sysfs(void)
for (i = 0; i < nr_ioapics; i++ ) {
size = sizeof(struct sys_device) + nr_ioapic_registers[i]
* sizeof(struct IO_APIC_route_entry);
- mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+ mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
if (!mp_ioapic_data[i]) {
printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
continue;
}
- memset(mp_ioapic_data[i], 0, size);
dev = &mp_ioapic_data[i]->dev;
dev->id = i;
dev->cls = &ioapic_sysdev_class;
@@ -1918,7 +1922,7 @@ void destroy_irq(unsigned int irq)
}
/*
- * MSI mesage composition
+ * MSI message composition
*/
#ifdef CONFIG_PCI_MSI
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
@@ -2028,8 +2032,64 @@ void arch_teardown_msi_irq(unsigned int irq)
destroy_irq(irq);
}
-#endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_DMAR
+#ifdef CONFIG_SMP
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ struct msi_msg msg;
+ unsigned int dest;
+ cpumask_t tmp;
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ dmar_msi_read(irq, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+ dmar_msi_write(irq, &msg);
+ irq_desc[irq].affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip dmar_msi_type = {
+ .name = "DMAR_MSI",
+ .unmask = dmar_msi_unmask,
+ .mask = dmar_msi_mask,
+ .ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = dmar_msi_set_affinity,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+ int ret;
+ struct msi_msg msg;
+
+ ret = msi_compose_msg(NULL, irq, &msg);
+ if (ret < 0)
+ return ret;
+ dmar_msi_write(irq, &msg);
+ set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+ "edge");
+ return 0;
+}
+#endif
+
+#endif /* CONFIG_PCI_MSI */
/*
* Hypertransport interrupt support
*/
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index e173b763f14..d3fde94f734 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -255,9 +255,17 @@ int show_interrupts(struct seq_file *p, void *v)
}
if (i < NR_IRQS) {
+ unsigned any_count = 0;
+
spin_lock_irqsave(&irq_desc[i].lock, flags);
+#ifndef CONFIG_SMP
+ any_count = kstat_irqs(i);
+#else
+ for_each_online_cpu(j)
+ any_count |= kstat_cpu(j).irqs[i];
+#endif
action = irq_desc[i].action;
- if (!action)
+ if (!action && !any_count)
goto skip;
seq_printf(p, "%3d: ",i);
#ifndef CONFIG_SMP
@@ -268,10 +276,12 @@ int show_interrupts(struct seq_file *p, void *v)
#endif
seq_printf(p, " %8s", irq_desc[i].chip->name);
seq_printf(p, "-%-8s", irq_desc[i].name);
- seq_printf(p, " %s", action->name);
- for (action=action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
+ if (action) {
+ seq_printf(p, " %s", action->name);
+ while ((action = action->next) != NULL)
+ seq_printf(p, ", %s", action->name);
+ }
seq_putc(p, '\n');
skip:
@@ -280,14 +290,41 @@ skip:
seq_printf(p, "NMI: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", nmi_count(j));
- seq_putc(p, '\n');
+ seq_printf(p, " Non-maskable interrupts\n");
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).apic_timer_irqs);
- seq_putc(p, '\n');
+ seq_printf(p, " Local timer interrupts\n");
#endif
+#ifdef CONFIG_SMP
+ seq_printf(p, "RES: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).irq_resched_count);
+ seq_printf(p, " Rescheduling interrupts\n");
+ seq_printf(p, "CAL: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).irq_call_count);
+ seq_printf(p, " function call interrupts\n");
+ seq_printf(p, "TLB: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).irq_tlb_count);
+ seq_printf(p, " TLB shootdowns\n");
+#endif
+ seq_printf(p, "TRM: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).irq_thermal_count);
+ seq_printf(p, " Thermal event interrupts\n");
+ seq_printf(p, "SPU: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat,j).irq_spurious_count);
+ seq_printf(p, " Spurious interrupts\n");
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 865669efc54..6b5c730d67b 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -62,9 +62,17 @@ int show_interrupts(struct seq_file *p, void *v)
}
if (i < NR_IRQS) {
+ unsigned any_count = 0;
+
spin_lock_irqsave(&irq_desc[i].lock, flags);
+#ifndef CONFIG_SMP
+ any_count = kstat_irqs(i);
+#else
+ for_each_online_cpu(j)
+ any_count |= kstat_cpu(j).irqs[i];
+#endif
action = irq_desc[i].action;
- if (!action)
+ if (!action && !any_count)
goto skip;
seq_printf(p, "%3d: ",i);
#ifndef CONFIG_SMP
@@ -76,9 +84,11 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, " %8s", irq_desc[i].chip->name);
seq_printf(p, "-%-8s", irq_desc[i].name);
- seq_printf(p, " %s", action->name);
- for (action=action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
+ if (action) {
+ seq_printf(p, " %s", action->name);
+ while ((action = action->next) != NULL)
+ seq_printf(p, ", %s", action->name);
+ }
seq_putc(p, '\n');
skip:
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
@@ -86,11 +96,37 @@ skip:
seq_printf(p, "NMI: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
- seq_putc(p, '\n');
+ seq_printf(p, " Non-maskable interrupts\n");
seq_printf(p, "LOC: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
- seq_putc(p, '\n');
+ seq_printf(p, " Local timer interrupts\n");
+#ifdef CONFIG_SMP
+ seq_printf(p, "RES: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
+ seq_printf(p, " Rescheduling interrupts\n");
+ seq_printf(p, "CAL: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
+ seq_printf(p, " function call interrupts\n");
+ seq_printf(p, "TLB: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
+ seq_printf(p, " TLB shootdowns\n");
+#endif
+ seq_printf(p, "TRM: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
+ seq_printf(p, " Thermal event interrupts\n");
+ seq_printf(p, "THR: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
+ seq_printf(p, " Threshold APIC interrupts\n");
+ seq_printf(p, "SPU: ");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
+ seq_printf(p, " Spurious interrupts\n");
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
}
return 0;
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index e7d0d3c2ef6..d87a523070d 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -41,6 +41,13 @@ void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+struct kretprobe_blackpoint kretprobe_blacklist[] = {
+ {"__switch_to", }, /* This function switches only current task, but
+ doesn't switch kernel stack.*/
+ {NULL, NULL} /* Terminator */
+};
+const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
+
/* insert a jmp code */
static __always_inline void set_jmp_op(void *from, void *to)
{
@@ -557,12 +564,7 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
resume_execution(cur, regs, kcb);
regs->eflags |= kcb->kprobe_saved_eflags;
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
- if (raw_irqs_disabled_flags(regs->eflags))
- trace_hardirqs_off();
- else
- trace_hardirqs_on();
-#endif
+ trace_hardirqs_fixup_flags(regs->eflags);
/*Restore back the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -584,7 +586,7 @@ out:
return 1;
}
-static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -666,7 +668,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
ret = NOTIFY_STOP;
break;
case DIE_GPF:
- case DIE_PAGE_FAULT:
/* kprobe_running() needs smp_processor_id() */
preempt_disable();
if (kprobe_running() &&
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index 62e28e52d78..3db3611933d 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -48,6 +48,13 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+struct kretprobe_blackpoint kretprobe_blacklist[] = {
+ {"__switch_to", }, /* This function switches only current task, but
+ doesn't switch kernel stack.*/
+ {NULL, NULL} /* Terminator */
+};
+const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
+
/*
* returns non-zero if opcode modifies the interrupt flag.
*/
@@ -544,12 +551,7 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs)
resume_execution(cur, regs, kcb);
regs->eflags |= kcb->kprobe_saved_rflags;
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
- if (raw_irqs_disabled_flags(regs->eflags))
- trace_hardirqs_off();
- else
- trace_hardirqs_on();
-#endif
+ trace_hardirqs_fixup_flags(regs->eflags);
/* Restore the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -657,7 +659,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
ret = NOTIFY_STOP;
break;
case DIE_GPF:
- case DIE_PAGE_FAULT:
/* kprobe_running() needs smp_processor_id() */
preempt_disable();
if (kprobe_running() &&
diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c
index a8b18421863..9ff90a27c45 100644
--- a/arch/x86/kernel/ldt_32.c
+++ b/arch/x86/kernel/ldt_32.c
@@ -92,13 +92,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
struct mm_struct * old_mm;
int retval = 0;
- init_MUTEX(&mm->context.sem);
+ mutex_init(&mm->context.lock);
mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
- down(&old_mm->context.sem);
+ mutex_lock(&old_mm->context.lock);
retval = copy_ldt(&mm->context, &old_mm->context);
- up(&old_mm->context.sem);
+ mutex_unlock(&old_mm->context.lock);
}
return retval;
}
@@ -130,7 +130,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
- down(&mm->context.sem);
+ mutex_lock(&mm->context.lock);
size = mm->context.size*LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
@@ -138,7 +138,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
err = 0;
if (copy_to_user(ptr, mm->context.ldt, size))
err = -EFAULT;
- up(&mm->context.sem);
+ mutex_unlock(&mm->context.lock);
if (err < 0)
goto error_return;
if (size != bytecount) {
@@ -194,7 +194,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
goto out;
}
- down(&mm->context.sem);
+ mutex_lock(&mm->context.lock);
if (ldt_info.entry_number >= mm->context.size) {
error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
if (error < 0)
@@ -221,7 +221,7 @@ install:
error = 0;
out_unlock:
- up(&mm->context.sem);
+ mutex_unlock(&mm->context.lock);
out:
return error;
}
diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c
index 3796523d616..60e57abb8e9 100644
--- a/arch/x86/kernel/ldt_64.c
+++ b/arch/x86/kernel/ldt_64.c
@@ -96,13 +96,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
struct mm_struct * old_mm;
int retval = 0;
- init_MUTEX(&mm->context.sem);
+ mutex_init(&mm->context.lock);
mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
- down(&old_mm->context.sem);
+ mutex_lock(&old_mm->context.lock);
retval = copy_ldt(&mm->context, &old_mm->context);
- up(&old_mm->context.sem);
+ mutex_unlock(&old_mm->context.lock);
}
return retval;
}
@@ -133,7 +133,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
- down(&mm->context.sem);
+ mutex_lock(&mm->context.lock);
size = mm->context.size*LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
@@ -141,7 +141,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
err = 0;
if (copy_to_user(ptr, mm->context.ldt, size))
err = -EFAULT;
- up(&mm->context.sem);
+ mutex_unlock(&mm->context.lock);
if (err < 0)
goto error_return;
if (size != bytecount) {
@@ -193,7 +193,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
goto out;
}
- down(&mm->context.sem);
+ mutex_lock(&mm->context.lock);
if (ldt_info.entry_number >= (unsigned)mm->context.size) {
error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
if (error < 0)
@@ -223,7 +223,7 @@ install:
error = 0;
out_unlock:
- up(&mm->context.sem);
+ mutex_unlock(&mm->context.lock);
out:
return error;
}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index deda9a221cf..11b935f4f88 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -10,6 +10,7 @@
#include <linux/kexec.h>
#include <linux/delay.h>
#include <linux/init.h>
+#include <linux/numa.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
@@ -148,24 +149,14 @@ NORET_TYPE void machine_kexec(struct kimage *image)
image->start, cpu_has_pae);
}
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never sets it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init parse_crashkernel(char *arg)
+void arch_crash_save_vmcoreinfo(void)
{
- unsigned long size, base;
- size = memparse(arg, &arg);
- if (*arg == '@') {
- base = memparse(arg+1, &arg);
- /* FIXME: Do I want a sanity check
- * to validate the memory range?
- */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- return 0;
+#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifdef CONFIG_X86_PAE
+ VMCOREINFO_CONFIG(X86_PAE);
+#endif
}
-early_param("crashkernel", parse_crashkernel);
+
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index cd1899a2f0c..aa3d2c8f773 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -10,6 +10,7 @@
#include <linux/kexec.h>
#include <linux/string.h>
#include <linux/reboot.h>
+#include <linux/numa.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -230,30 +231,13 @@ NORET_TYPE void machine_kexec(struct kimage *image)
image->start);
}
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init setup_crashkernel(char *arg)
+void arch_crash_save_vmcoreinfo(void)
{
- unsigned long size, base;
- char *p;
- if (!arg)
- return -EINVAL;
- size = memparse(arg, &p);
- if (arg == p)
- return -EINVAL;
- if (*p == '@') {
- base = memparse(p+1, &p);
- /* FIXME: Do I want a sanity check to validate the
- * memory range? Yes you do, but it's too early for
- * e820 -AK */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- return 0;
+ VMCOREINFO_SYMBOL(init_level4_pgt);
+
+#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
}
-early_param("crashkernel", setup_crashkernel);
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 09cf7811035..09c315214a5 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -132,7 +132,7 @@ static struct ucode_cpu_info {
static void collect_cpu_info(int cpu_num)
{
- struct cpuinfo_x86 *c = cpu_data + cpu_num;
+ struct cpuinfo_x86 *c = &cpu_data(cpu_num);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
unsigned int val[2];
@@ -522,7 +522,7 @@ static struct platform_device *microcode_pdev;
static int cpu_request_microcode(int cpu)
{
char name[30];
- struct cpuinfo_x86 *c = cpu_data + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
void *buf;
unsigned long size;
@@ -570,7 +570,7 @@ static int cpu_request_microcode(int cpu)
static int apply_microcode_check_cpu(int cpu)
{
- struct cpuinfo_x86 *c = cpu_data + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
cpumask_t old;
unsigned int val[2];
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index 13abb4ebfb7..7a05a7f6099 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -1001,7 +1001,7 @@ void __init mp_config_acpi_legacy_irqs (void)
/*
* Use the default configuration for the IRQs 0-15. Unless
- * overriden by (MADT) interrupt source override entries.
+ * overridden by (MADT) interrupt source override entries.
*/
for (i = 0; i < 16; i++) {
int idx;
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index 8bf0ca03ac8..ef4aab12358 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -57,6 +57,8 @@ unsigned long mp_lapic_addr = 0;
/* Processor that is doing the boot up */
unsigned int boot_cpu_id = -1U;
+EXPORT_SYMBOL(boot_cpu_id);
+
/* Internal processor count */
unsigned int num_processors __cpuinitdata = 0;
@@ -86,7 +88,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF;
}
-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
{
int cpu;
cpumask_t tmp_map;
@@ -123,7 +125,18 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
cpu = 0;
}
bios_cpu_apicid[cpu] = m->mpc_apicid;
- x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+ /*
+ * We get called early in the the start_kernel initialization
+ * process when the per_cpu data area is not yet setup, so we
+ * use a static array that is removed after the per_cpu data
+ * area is created.
+ */
+ if (x86_cpu_to_apicid_ptr) {
+ u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr;
+ x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+ } else {
+ per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
+ }
cpu_set(cpu, cpu_possible_map);
cpu_set(cpu, cpu_present_map);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index c044de310b6..ee6eba4ecfe 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -112,7 +112,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
static int msr_open(struct inode *inode, struct file *file)
{
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
- struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
if (cpu >= NR_CPUS || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
@@ -133,37 +133,42 @@ static const struct file_operations msr_fops = {
.open = msr_open,
};
-static int msr_device_create(int i)
+static int __cpuinit msr_device_create(int cpu)
{
- int err = 0;
struct device *dev;
- dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), "msr%d",i);
- if (IS_ERR(dev))
- err = PTR_ERR(dev);
- return err;
+ dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu),
+ "msr%d", cpu);
+ return IS_ERR(dev) ? PTR_ERR(dev) : 0;
}
-static int msr_class_cpu_callback(struct notifier_block *nfb,
+static void msr_device_destroy(int cpu)
+{
+ device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
+}
+
+static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
+ int err = 0;
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- msr_device_create(cpu);
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ err = msr_device_create(cpu);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
+ msr_device_destroy(cpu);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata msr_class_cpu_notifier =
-{
+static struct notifier_block __cpuinitdata msr_class_cpu_notifier = {
.notifier_call = msr_class_cpu_callback,
};
@@ -196,7 +201,7 @@ static int __init msr_init(void)
out_class:
i = 0;
for_each_online_cpu(i)
- device_destroy(msr_class, MKDEV(MSR_MAJOR, i));
+ msr_device_destroy(i);
class_destroy(msr_class);
out_chrdev:
unregister_chrdev(MSR_MAJOR, "cpu/msr");
@@ -208,7 +213,7 @@ static void __exit msr_exit(void)
{
int cpu = 0;
for_each_online_cpu(cpu)
- device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
+ msr_device_destroy(cpu);
class_destroy(msr_class);
unregister_chrdev(MSR_MAJOR, "cpu/msr");
unregister_hotcpu_notifier(&msr_class_cpu_notifier);
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index f803ed0ed1c..600fd404e44 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -51,13 +51,13 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
static int endflag __initdata = 0;
-#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
* CPUs during the test make them busy.
*/
static __init void nmi_cpu_busy(void *data)
{
+#ifdef CONFIG_SMP
local_irq_enable_in_hardirq();
/* Intentionally don't use cpu_relax here. This is
to make sure that the performance counter really ticks,
@@ -67,8 +67,8 @@ static __init void nmi_cpu_busy(void *data)
care if they get somewhat less cycles. */
while (endflag == 0)
mb();
-}
#endif
+}
static int __init check_nmi_watchdog(void)
{
diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c
index 739cfb207dd..6a80d67c212 100644
--- a/arch/x86/kernel/paravirt_32.c
+++ b/arch/x86/kernel/paravirt_32.c
@@ -42,32 +42,33 @@ void _paravirt_nop(void)
static void __init default_banner(void)
{
printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
- paravirt_ops.name);
+ pv_info.name);
}
char *memory_setup(void)
{
- return paravirt_ops.memory_setup();
+ return pv_init_ops.memory_setup();
}
/* Simple instruction patching code. */
-#define DEF_NATIVE(name, code) \
- extern const char start_##name[], end_##name[]; \
- asm("start_" #name ": " code "; end_" #name ":")
-
-DEF_NATIVE(irq_disable, "cli");
-DEF_NATIVE(irq_enable, "sti");
-DEF_NATIVE(restore_fl, "push %eax; popf");
-DEF_NATIVE(save_fl, "pushf; pop %eax");
-DEF_NATIVE(iret, "iret");
-DEF_NATIVE(irq_enable_sysexit, "sti; sysexit");
-DEF_NATIVE(read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(read_cr3, "mov %cr3, %eax");
-DEF_NATIVE(clts, "clts");
-DEF_NATIVE(read_tsc, "rdtsc");
-
-DEF_NATIVE(ud2a, "ud2a");
+#define DEF_NATIVE(ops, name, code) \
+ extern const char start_##ops##_##name[], end_##ops##_##name[]; \
+ asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
+
+DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
+DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
+DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
+DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
+DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
+DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(pv_cpu_ops, clts, "clts");
+DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+
+/* Undefined instruction for dealing with missing ops pointers. */
+static const unsigned char ud2a[] = { 0x0f, 0x0b };
static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
@@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned ret;
switch(type) {
-#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site
- SITE(irq_disable);
- SITE(irq_enable);
- SITE(restore_fl);
- SITE(save_fl);
- SITE(iret);
- SITE(irq_enable_sysexit);
- SITE(read_cr2);
- SITE(read_cr3);
- SITE(write_cr3);
- SITE(clts);
- SITE(read_tsc);
+#define SITE(ops, x) \
+ case PARAVIRT_PATCH(ops.x): \
+ start = start_##ops##_##x; \
+ end = end_##ops##_##x; \
+ goto patch_site
+
+ SITE(pv_irq_ops, irq_disable);
+ SITE(pv_irq_ops, irq_enable);
+ SITE(pv_irq_ops, restore_fl);
+ SITE(pv_irq_ops, save_fl);
+ SITE(pv_cpu_ops, iret);
+ SITE(pv_cpu_ops, irq_enable_sysexit);
+ SITE(pv_mmu_ops, read_cr2);
+ SITE(pv_mmu_ops, read_cr3);
+ SITE(pv_mmu_ops, write_cr3);
+ SITE(pv_cpu_ops, clts);
+ SITE(pv_cpu_ops, read_tsc);
#undef SITE
patch_site:
ret = paravirt_patch_insns(ibuf, len, start, end);
break;
- case PARAVIRT_PATCH(make_pgd):
- case PARAVIRT_PATCH(make_pte):
- case PARAVIRT_PATCH(pgd_val):
- case PARAVIRT_PATCH(pte_val):
-#ifdef CONFIG_X86_PAE
- case PARAVIRT_PATCH(make_pmd):
- case PARAVIRT_PATCH(pmd_val):
-#endif
- /* These functions end up returning exactly what
- they're passed, in the same registers. */
- ret = paravirt_patch_nop();
- break;
-
default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
@@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf,
return 5;
}
-unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
+unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
unsigned long addr, unsigned len)
{
struct branch *b = insnbuf;
@@ -165,22 +158,37 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
return 5;
}
+/* Neat trick to map patch type back to the call within the
+ * corresponding structure. */
+static void *get_call_destination(u8 type)
+{
+ struct paravirt_patch_template tmpl = {
+ .pv_init_ops = pv_init_ops,
+ .pv_time_ops = pv_time_ops,
+ .pv_cpu_ops = pv_cpu_ops,
+ .pv_irq_ops = pv_irq_ops,
+ .pv_apic_ops = pv_apic_ops,
+ .pv_mmu_ops = pv_mmu_ops,
+ };
+ return *((void **)&tmpl + type);
+}
+
unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
unsigned long addr, unsigned len)
{
- void *opfunc = *((void **)&paravirt_ops + type);
+ void *opfunc = get_call_destination(type);
unsigned ret;
if (opfunc == NULL)
/* If there's no function, patch it with a ud2a (BUG) */
- ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a);
+ ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == paravirt_nop)
/* If the operation is a nop, then nop the callsite */
ret = paravirt_patch_nop();
- else if (type == PARAVIRT_PATCH(iret) ||
- type == PARAVIRT_PATCH(irq_enable_sysexit))
+ else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+ type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
/* If operation requires a jmp, then jmp */
- ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len);
+ ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
else
/* Otherwise call the function; assume target could
clobber any caller-save reg */
@@ -205,7 +213,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
void init_IRQ(void)
{
- paravirt_ops.init_IRQ();
+ pv_irq_ops.init_IRQ();
}
static void native_flush_tlb(void)
@@ -233,7 +241,7 @@ extern void native_irq_enable_sysexit(void);
static int __init print_banner(void)
{
- paravirt_ops.banner();
+ pv_init_ops.banner();
return 0;
}
core_initcall(print_banner);
@@ -273,47 +281,96 @@ int paravirt_disable_iospace(void)
return ret;
}
-struct paravirt_ops paravirt_ops = {
+static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
+
+static inline void enter_lazy(enum paravirt_lazy_mode mode)
+{
+ BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
+ BUG_ON(preemptible());
+
+ x86_write_percpu(paravirt_lazy_mode, mode);
+}
+
+void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
+{
+ BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode);
+ BUG_ON(preemptible());
+
+ x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
+}
+
+void paravirt_enter_lazy_mmu(void)
+{
+ enter_lazy(PARAVIRT_LAZY_MMU);
+}
+
+void paravirt_leave_lazy_mmu(void)
+{
+ paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
+}
+
+void paravirt_enter_lazy_cpu(void)
+{
+ enter_lazy(PARAVIRT_LAZY_CPU);
+}
+
+void paravirt_leave_lazy_cpu(void)
+{
+ paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
+}
+
+enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
+{
+ return x86_read_percpu(paravirt_lazy_mode);
+}
+
+struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
.kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
+};
- .patch = native_patch,
+struct pv_init_ops pv_init_ops = {
+ .patch = native_patch,
.banner = default_banner,
.arch_setup = paravirt_nop,
.memory_setup = machine_specific_memory_setup,
+};
+
+struct pv_time_ops pv_time_ops = {
+ .time_init = hpet_time_init,
.get_wallclock = native_get_wallclock,
.set_wallclock = native_set_wallclock,
- .time_init = hpet_time_init,
+ .sched_clock = native_sched_clock,
+ .get_cpu_khz = native_calculate_cpu_khz,
+};
+
+struct pv_irq_ops pv_irq_ops = {
.init_IRQ = native_init_IRQ,
+ .save_fl = native_save_fl,
+ .restore_fl = native_restore_fl,
+ .irq_disable = native_irq_disable,
+ .irq_enable = native_irq_enable,
+ .safe_halt = native_safe_halt,
+ .halt = native_halt,
+};
+struct pv_cpu_ops pv_cpu_ops = {
.cpuid = native_cpuid,
.get_debugreg = native_get_debugreg,
.set_debugreg = native_set_debugreg,
.clts = native_clts,
.read_cr0 = native_read_cr0,
.write_cr0 = native_write_cr0,
- .read_cr2 = native_read_cr2,
- .write_cr2 = native_write_cr2,
- .read_cr3 = native_read_cr3,
- .write_cr3 = native_write_cr3,
.read_cr4 = native_read_cr4,
.read_cr4_safe = native_read_cr4_safe,
.write_cr4 = native_write_cr4,
- .save_fl = native_save_fl,
- .restore_fl = native_restore_fl,
- .irq_disable = native_irq_disable,
- .irq_enable = native_irq_enable,
- .safe_halt = native_safe_halt,
- .halt = native_halt,
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
- .sched_clock = native_sched_clock,
- .get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
.load_gdt = native_load_gdt,
@@ -327,9 +384,19 @@ struct paravirt_ops paravirt_ops = {
.write_idt_entry = write_dt_entry,
.load_esp0 = native_load_esp0,
+ .irq_enable_sysexit = native_irq_enable_sysexit,
+ .iret = native_iret,
+
.set_iopl_mask = native_set_iopl_mask,
.io_delay = native_io_delay,
+ .lazy_mode = {
+ .enter = paravirt_nop,
+ .leave = paravirt_nop,
+ },
+};
+
+struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC
.apic_write = native_apic_write,
.apic_write_atomic = native_apic_write_atomic,
@@ -338,11 +405,17 @@ struct paravirt_ops paravirt_ops = {
.setup_secondary_clock = setup_secondary_APIC_clock,
.startup_ipi_hook = paravirt_nop,
#endif
- .set_lazy_mode = paravirt_nop,
+};
+struct pv_mmu_ops pv_mmu_ops = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
+ .read_cr2 = native_read_cr2,
+ .write_cr2 = native_write_cr2,
+ .read_cr3 = native_read_cr3,
+ .write_cr3 = native_write_cr3,
+
.flush_tlb_user = native_flush_tlb,
.flush_tlb_kernel = native_flush_tlb_global,
.flush_tlb_single = native_flush_tlb_single,
@@ -381,12 +454,19 @@ struct paravirt_ops paravirt_ops = {
.make_pte = native_make_pte,
.make_pgd = native_make_pgd,
- .irq_enable_sysexit = native_irq_enable_sysexit,
- .iret = native_iret,
-
.dup_mmap = paravirt_nop,
.exit_mmap = paravirt_nop,
.activate_mm = paravirt_nop,
+
+ .lazy_mode = {
+ .enter = paravirt_nop,
+ .leave = paravirt_nop,
+ },
};
-EXPORT_SYMBOL(paravirt_ops);
+EXPORT_SYMBOL_GPL(pv_time_ops);
+EXPORT_SYMBOL_GPL(pv_cpu_ops);
+EXPORT_SYMBOL_GPL(pv_mmu_ops);
+EXPORT_SYMBOL_GPL(pv_apic_ops);
+EXPORT_SYMBOL_GPL(pv_info);
+EXPORT_SYMBOL (pv_irq_ops);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 71da01e73f0..6bf1f716909 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -35,7 +35,8 @@
#include <linux/pci_ids.h>
#include <linux/pci.h>
#include <linux/delay.h>
-#include <asm/iommu.h>
+#include <linux/scatterlist.h>
+#include <asm/gart.h>
#include <asm/calgary.h>
#include <asm/tce.h>
#include <asm/pci-direct.h>
@@ -221,10 +222,10 @@ static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
return npages;
}
-static inline int translate_phb(struct pci_dev* dev)
+static inline int translation_enabled(struct iommu_table *tbl)
{
- int disabled = bus_info[dev->bus->number].translation_disabled;
- return !disabled;
+ /* only PHBs with translation enabled have an IOMMU table */
+ return (tbl != NULL);
}
static void iommu_range_reserve(struct iommu_table *tbl,
@@ -384,33 +385,36 @@ static void calgary_unmap_sg(struct device *dev,
struct scatterlist *sglist, int nelems, int direction)
{
struct iommu_table *tbl = find_iommu_table(dev);
+ struct scatterlist *s;
+ int i;
- if (!translate_phb(to_pci_dev(dev)))
+ if (!translation_enabled(tbl))
return;
- while (nelems--) {
+ for_each_sg(sglist, s, nelems, i) {
unsigned int npages;
- dma_addr_t dma = sglist->dma_address;
- unsigned int dmalen = sglist->dma_length;
+ dma_addr_t dma = s->dma_address;
+ unsigned int dmalen = s->dma_length;
if (dmalen == 0)
break;
npages = num_dma_pages(dma, dmalen);
iommu_free(tbl, dma, npages);
- sglist++;
}
}
static int calgary_nontranslate_map_sg(struct device* dev,
struct scatterlist *sg, int nelems, int direction)
{
+ struct scatterlist *s;
int i;
- for (i = 0; i < nelems; i++ ) {
- struct scatterlist *s = &sg[i];
- BUG_ON(!s->page);
- s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
+ for_each_sg(sg, s, nelems, i) {
+ struct page *p = sg_page(s);
+
+ BUG_ON(!p);
+ s->dma_address = virt_to_bus(sg_virt(s));
s->dma_length = s->length;
}
return nelems;
@@ -420,19 +424,19 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
{
struct iommu_table *tbl = find_iommu_table(dev);
+ struct scatterlist *s;
unsigned long vaddr;
unsigned int npages;
unsigned long entry;
int i;
- if (!translate_phb(to_pci_dev(dev)))
+ if (!translation_enabled(tbl))
return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
- for (i = 0; i < nelems; i++ ) {
- struct scatterlist *s = &sg[i];
- BUG_ON(!s->page);
+ for_each_sg(sg, s, nelems, i) {
+ BUG_ON(!sg_page(s));
- vaddr = (unsigned long)page_address(s->page) + s->offset;
+ vaddr = (unsigned long) sg_virt(s);
npages = num_dma_pages(vaddr, s->length);
entry = iommu_range_alloc(tbl, npages);
@@ -454,9 +458,9 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
return nelems;
error:
calgary_unmap_sg(dev, sg, nelems, direction);
- for (i = 0; i < nelems; i++) {
- sg[i].dma_address = bad_dma_address;
- sg[i].dma_length = 0;
+ for_each_sg(sg, s, nelems, i) {
+ sg->dma_address = bad_dma_address;
+ sg->dma_length = 0;
}
return 0;
}
@@ -472,7 +476,7 @@ static dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
uaddr = (unsigned long)vaddr;
npages = num_dma_pages(uaddr, size);
- if (translate_phb(to_pci_dev(dev)))
+ if (translation_enabled(tbl))
dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
else
dma_handle = virt_to_bus(vaddr);
@@ -486,7 +490,7 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
struct iommu_table *tbl = find_iommu_table(dev);
unsigned int npages;
- if (!translate_phb(to_pci_dev(dev)))
+ if (!translation_enabled(tbl))
return;
npages = num_dma_pages(dma_handle, size);
@@ -511,7 +515,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
goto error;
memset(ret, 0, size);
- if (translate_phb(to_pci_dev(dev))) {
+ if (translation_enabled(tbl)) {
/* set up tces to cover the allocated range */
mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
if (mapping == bad_dma_address)
@@ -1192,7 +1196,7 @@ static int __init calgary_init(void)
{
int ret;
struct pci_dev *dev = NULL;
- void *tce_space;
+ struct calgary_bus_info *info;
ret = calgary_locate_bbars();
if (ret)
@@ -1204,12 +1208,14 @@ static int __init calgary_init(void)
break;
if (!is_cal_pci_dev(dev->device))
continue;
- if (!translate_phb(dev)) {
+
+ info = &bus_info[dev->bus->number];
+ if (info->translation_disabled) {
calgary_init_one_nontraslated(dev);
continue;
}
- tce_space = bus_info[dev->bus->number].tce_space;
- if (!tce_space && !translate_empty_slots)
+
+ if (!info->tce_space && !translate_empty_slots)
continue;
ret = calgary_init_one(dev);
@@ -1227,11 +1233,13 @@ error:
break;
if (!is_cal_pci_dev(dev->device))
continue;
- if (!translate_phb(dev)) {
+
+ info = &bus_info[dev->bus->number];
+ if (info->translation_disabled) {
pci_dev_put(dev);
continue;
}
- if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
+ if (!info->tce_space && !translate_empty_slots)
continue;
calgary_disable_translation(dev);
@@ -1544,7 +1552,7 @@ static void __init calgary_fixup_one_tce_space(struct pci_dev *dev)
static int __init calgary_fixup_tce_spaces(void)
{
struct pci_dev *dev = NULL;
- void *tce_space;
+ struct calgary_bus_info *info;
if (no_iommu || swiotlb || !calgary_detected)
return -ENODEV;
@@ -1557,11 +1565,12 @@ static int __init calgary_fixup_tce_spaces(void)
break;
if (!is_cal_pci_dev(dev->device))
continue;
- if (!translate_phb(dev))
+
+ info = &bus_info[dev->bus->number];
+ if (info->translation_disabled)
continue;
- tce_space = bus_info[dev->bus->number].tce_space;
- if (!tce_space)
+ if (!info->tce_space)
continue;
calgary_fixup_one_tce_space(dev);
diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c
index 0aae2f3847a..51330321a5d 100644
--- a/arch/x86/kernel/pci-dma_32.c
+++ b/arch/x86/kernel/pci-dma_32.c
@@ -12,7 +12,6 @@
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/module.h>
-#include <linux/pci.h>
#include <asm/io.h>
struct dma_coherent_mem {
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index 9576a2eb375..aa805b11b24 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -7,11 +7,12 @@
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/module.h>
+#include <linux/dmar.h>
#include <asm/io.h>
-#include <asm/iommu.h>
+#include <asm/gart.h>
#include <asm/calgary.h>
-int iommu_merge __read_mostly = 0;
+int iommu_merge __read_mostly = 1;
EXPORT_SYMBOL(iommu_merge);
dma_addr_t bad_dma_address __read_mostly;
@@ -51,11 +52,9 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
{
struct page *page;
int node;
-#ifdef CONFIG_PCI
- if (dev->bus == &pci_bus_type)
- node = pcibus_to_node(to_pci_dev(dev)->bus);
- else
-#endif
+
+ node = dev_to_node(dev);
+ if (node == -1)
node = numa_node_id();
if (node < first_node(node_online_map))
@@ -276,7 +275,7 @@ __init int iommu_setup(char *p)
swiotlb = 1;
#endif
-#ifdef CONFIG_IOMMU
+#ifdef CONFIG_GART_IOMMU
gart_parse_options(p);
#endif
@@ -299,14 +298,16 @@ void __init pci_iommu_alloc(void)
* The order of these functions is important for
* fall-back/fail-over reasons
*/
-#ifdef CONFIG_IOMMU
- iommu_hole_init();
+#ifdef CONFIG_GART_IOMMU
+ gart_iommu_hole_init();
#endif
#ifdef CONFIG_CALGARY_IOMMU
detect_calgary();
#endif
+ detect_intel_iommu();
+
#ifdef CONFIG_SWIOTLB
pci_swiotlb_init();
#endif
@@ -318,7 +319,9 @@ static int __init pci_iommu_init(void)
calgary_iommu_init();
#endif
-#ifdef CONFIG_IOMMU
+ intel_iommu_init();
+
+#ifdef CONFIG_GART_IOMMU
gart_iommu_init();
#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 4918c575d58..06bcba53604 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -8,6 +8,7 @@
* See Documentation/DMA-mapping.txt for the interface specification.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
+ * Subject to the GNU General Public License v2 only.
*/
#include <linux/types.h>
@@ -23,22 +24,23 @@
#include <linux/interrupt.h>
#include <linux/bitops.h>
#include <linux/kdebug.h>
+#include <linux/scatterlist.h>
#include <asm/atomic.h>
#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
-#include <asm/iommu.h>
+#include <asm/gart.h>
#include <asm/cacheflush.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
#include <asm/k8.h>
-unsigned long iommu_bus_base; /* GART remapping area (physical) */
+static unsigned long iommu_bus_base; /* GART remapping area (physical) */
static unsigned long iommu_size; /* size of remapping area bytes */
static unsigned long iommu_pages; /* .. and in pages */
-u32 *iommu_gatt_base; /* Remapping table */
+static u32 *iommu_gatt_base; /* Remapping table */
/* If this is disabled the IOMMU will use an optimized flushing strategy
of only flushing when an mapping is reused. With it true the GART is flushed
@@ -133,8 +135,8 @@ static void flush_gart(void)
/* Debugging aid for drivers that don't free their IOMMU tables */
static void **iommu_leak_tab;
static int leak_trace;
-int iommu_leak_pages = 20;
-void dump_leak(void)
+static int iommu_leak_pages = 20;
+static void dump_leak(void)
{
int i;
static int dump;
@@ -278,10 +280,10 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
*/
static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
{
+ struct scatterlist *s;
int i;
- for (i = 0; i < nents; i++) {
- struct scatterlist *s = &sg[i];
+ for_each_sg(sg, s, nents, i) {
if (!s->dma_length || !s->length)
break;
gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
@@ -292,15 +294,15 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
int nents, int dir)
{
+ struct scatterlist *s;
int i;
#ifdef CONFIG_IOMMU_DEBUG
printk(KERN_DEBUG "dma_map_sg overflow\n");
#endif
- for (i = 0; i < nents; i++ ) {
- struct scatterlist *s = &sg[i];
- unsigned long addr = page_to_phys(s->page) + s->offset;
+ for_each_sg(sg, s, nents, i) {
+ unsigned long addr = sg_phys(s);
if (nonforced_iommu(dev, addr, s->length)) {
addr = dma_map_area(dev, addr, s->length, dir);
if (addr == bad_dma_address) {
@@ -319,24 +321,23 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
}
/* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
+static int __dma_map_cont(struct scatterlist *start, int nelems,
struct scatterlist *sout, unsigned long pages)
{
unsigned long iommu_start = alloc_iommu(pages);
unsigned long iommu_page = iommu_start;
+ struct scatterlist *s;
int i;
if (iommu_start == -1)
return -1;
-
- for (i = start; i < stopat; i++) {
- struct scatterlist *s = &sg[i];
+
+ for_each_sg(start, s, nelems, i) {
unsigned long pages, addr;
unsigned long phys_addr = s->dma_address;
- BUG_ON(i > start && s->offset);
- if (i == start) {
- *sout = *s;
+ BUG_ON(s != start && s->offset);
+ if (s == start) {
sout->dma_address = iommu_bus_base;
sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
sout->dma_length = s->length;
@@ -357,30 +358,32 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
return 0;
}
-static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
+static inline int dma_map_cont(struct scatterlist *start, int nelems,
struct scatterlist *sout,
unsigned long pages, int need)
{
- if (!need) {
- BUG_ON(stopat - start != 1);
- *sout = sg[start];
- sout->dma_length = sg[start].length;
+ if (!need) {
+ BUG_ON(nelems != 1);
+ sout->dma_address = start->dma_address;
+ sout->dma_length = start->length;
return 0;
- }
- return __dma_map_cont(sg, start, stopat, sout, pages);
+ }
+ return __dma_map_cont(start, nelems, sout, pages);
}
/*
* DMA map all entries in a scatterlist.
* Merge chunks that have page aligned sizes into a continuous mapping.
*/
-int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
+static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ int dir)
{
int i;
int out;
int start;
unsigned long pages = 0;
int need = 0, nextneed;
+ struct scatterlist *s, *ps, *start_sg, *sgmap;
if (nents == 0)
return 0;
@@ -390,9 +393,10 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
out = 0;
start = 0;
- for (i = 0; i < nents; i++) {
- struct scatterlist *s = &sg[i];
- dma_addr_t addr = page_to_phys(s->page) + s->offset;
+ start_sg = sgmap = sg;
+ ps = NULL; /* shut up gcc */
+ for_each_sg(sg, s, nents, i) {
+ dma_addr_t addr = sg_phys(s);
s->dma_address = addr;
BUG_ON(s->length == 0);
@@ -400,34 +404,38 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
/* Handle the previous not yet processed entries */
if (i > start) {
- struct scatterlist *ps = &sg[i-1];
/* Can only merge when the last chunk ends on a page
boundary and the new one doesn't have an offset. */
if (!iommu_merge || !nextneed || !need || s->offset ||
- (ps->offset + ps->length) % PAGE_SIZE) {
- if (dma_map_cont(sg, start, i, sg+out, pages,
- need) < 0)
+ (ps->offset + ps->length) % PAGE_SIZE) {
+ if (dma_map_cont(start_sg, i - start, sgmap,
+ pages, need) < 0)
goto error;
out++;
+ sgmap = sg_next(sgmap);
pages = 0;
- start = i;
+ start = i;
+ start_sg = s;
}
}
need = nextneed;
pages += to_pages(s->offset, s->length);
+ ps = s;
}
- if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
+ if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0)
goto error;
out++;
flush_gart();
- if (out < nents)
- sg[out].dma_length = 0;
+ if (out < nents) {
+ sgmap = sg_next(sgmap);
+ sgmap->dma_length = 0;
+ }
return out;
error:
flush_gart();
- gart_unmap_sg(dev, sg, nents, dir);
+ gart_unmap_sg(dev, sg, out, dir);
/* When it was forced or merged try again in a dumb way */
if (force_iommu || iommu_merge) {
out = dma_map_sg_nonforce(dev, sg, nents, dir);
@@ -437,8 +445,8 @@ error:
if (panic_on_overflow)
panic("dma_map_sg: overflow on %lu pages\n", pages);
iommu_full(dev, pages << PAGE_SHIFT, dir);
- for (i = 0; i < nents; i++)
- sg[i].dma_address = bad_dma_address;
+ for_each_sg(sg, s, nents, i)
+ s->dma_address = bad_dma_address;
return 0;
}
@@ -619,12 +627,12 @@ void __init gart_iommu_init(void)
return;
/* Did we detect a different HW IOMMU? */
- if (iommu_detected && !iommu_aperture)
+ if (iommu_detected && !gart_iommu_aperture)
return;
if (no_iommu ||
(!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
- !iommu_aperture ||
+ !gart_iommu_aperture ||
(no_agp && init_k8_gatt(&info) < 0)) {
if (end_pfn > MAX_DMA32_PFN) {
printk(KERN_ERR "WARNING more than 4GB of memory "
@@ -725,9 +733,9 @@ void __init gart_parse_options(char *p)
fix_aperture = 0;
/* duplicated from pci-dma.c */
if (!strncmp(p,"force",5))
- iommu_aperture_allowed = 1;
+ gart_iommu_aperture_allowed = 1;
if (!strncmp(p,"allowed",7))
- iommu_aperture_allowed = 1;
+ gart_iommu_aperture_allowed = 1;
if (!strncmp(p, "memaper", 7)) {
fallback_aper_force = 1;
p += 7;
diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c
index 2a34c6c025a..ab08e183222 100644
--- a/arch/x86/kernel/pci-nommu_64.c
+++ b/arch/x86/kernel/pci-nommu_64.c
@@ -5,8 +5,9 @@
#include <linux/pci.h>
#include <linux/string.h>
#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
-#include <asm/iommu.h>
+#include <asm/gart.h>
#include <asm/processor.h>
#include <asm/dma.h>
@@ -57,12 +58,12 @@ static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
int nents, int direction)
{
+ struct scatterlist *s;
int i;
- for (i = 0; i < nents; i++ ) {
- struct scatterlist *s = &sg[i];
- BUG_ON(!s->page);
- s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
+ for_each_sg(sg, s, nents, i) {
+ BUG_ON(!sg_page(s));
+ s->dma_address = virt_to_bus(sg_virt(s));
if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
return 0;
s->dma_length = s->length;
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index b2f405ea7c8..102866d729a 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -5,7 +5,7 @@
#include <linux/module.h>
#include <linux/dma-mapping.h>
-#include <asm/iommu.h>
+#include <asm/gart.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 097aeafce5f..7b899584d29 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -295,34 +295,52 @@ static int __init idle_setup(char *str)
}
early_param("idle", idle_setup);
-void show_regs(struct pt_regs * regs)
+void __show_registers(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
+ unsigned long esp;
+ unsigned short ss, gs;
+
+ if (user_mode_vm(regs)) {
+ esp = regs->esp;
+ ss = regs->xss & 0xffff;
+ savesegment(gs, gs);
+ } else {
+ esp = (unsigned long) (&regs->esp);
+ savesegment(ss, ss);
+ savesegment(gs, gs);
+ }
printk("\n");
- printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
- printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
+ printk("Pid: %d, comm: %s %s (%s %.*s)\n",
+ task_pid_nr(current), current->comm,
+ print_tainted(), init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+
+ printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+ 0xffff & regs->xcs, regs->eip, regs->eflags,
+ smp_processor_id());
print_symbol("EIP is at %s\n", regs->eip);
- if (user_mode_vm(regs))
- printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
- printk(" EFLAGS: %08lx %s (%s %.*s)\n",
- regs->eflags, print_tainted(), init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
- regs->eax,regs->ebx,regs->ecx,regs->edx);
- printk("ESI: %08lx EDI: %08lx EBP: %08lx",
- regs->esi, regs->edi, regs->ebp);
- printk(" DS: %04x ES: %04x FS: %04x\n",
- 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff,
+ regs->xfs & 0xffff, gs, ss);
+
+ if (!all)
+ return;
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4_safe();
- printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+ printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+ cr0, cr2, cr3, cr4);
get_debugreg(d0, 0);
get_debugreg(d1, 1);
@@ -330,10 +348,16 @@ void show_regs(struct pt_regs * regs)
get_debugreg(d3, 3);
printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
d0, d1, d2, d3);
+
get_debugreg(d6, 6);
get_debugreg(d7, 7);
- printk("DR6: %08lx DR7: %08lx\n", d6, d7);
+ printk("DR6: %08lx DR7: %08lx\n",
+ d6, d7);
+}
+void show_regs(struct pt_regs *regs)
+{
+ __show_registers(regs, 1);
show_trace(NULL, regs, &regs->esp);
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7352d4b377e..6309b275cb9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -581,7 +581,7 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
*
* Kprobes not supported here. Set the probe on schedule instead.
*/
-__kprobes struct task_struct *
+struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 0cecd7513c9..ff5431cc03e 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -165,7 +165,7 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_
seg &= ~7UL;
- down(&child->mm->context.sem);
+ mutex_lock(&child->mm->context.lock);
if (unlikely((seg >> 3) >= child->mm->context.size))
addr = -1L; /* bogus selector, access would fault */
else {
@@ -179,7 +179,7 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_
addr &= 0xffff;
addr += base;
}
- up(&child->mm->context.sem);
+ mutex_unlock(&child->mm->context.lock);
}
return addr;
}
@@ -524,11 +524,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
ret = 0;
break;
- case PTRACE_DETACH:
- /* detach a process that was attached. */
- ret = ptrace_detach(child, data);
- break;
-
case PTRACE_GETREGS: { /* Get all gp regs from the child. */
if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
ret = -EIO;
@@ -637,7 +632,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
/* User-mode eip? */
info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL;
- /* Send us the fakey SIGTRAP */
+ /* Send us the fake SIGTRAP */
force_sig_info(SIGTRAP, &info, tsk);
}
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index c0cac42df3b..607085f3f08 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -103,7 +103,7 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r
seg &= ~7UL;
- down(&child->mm->context.sem);
+ mutex_lock(&child->mm->context.lock);
if (unlikely((seg >> 3) >= child->mm->context.size))
addr = -1L; /* bogus selector, access would fault */
else {
@@ -117,7 +117,7 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r
addr &= 0xffff;
addr += base;
}
- up(&child->mm->context.sem);
+ mutex_unlock(&child->mm->context.lock);
}
return addr;
@@ -500,11 +500,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
ret = 0;
break;
- case PTRACE_DETACH:
- /* detach a process that was attached. */
- ret = ptrace_detach(child, data);
- break;
-
case PTRACE_GETREGS: { /* Get all gp regs from the child. */
if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
sizeof(struct user_regs_struct))) {
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index d769e204f94..fab30e13483 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -45,9 +45,12 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
if (!(config & 0x2))
pci_write_config_byte(dev, 0xf4, config);
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH,
+ quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH,
+ quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH,
+ quirk_intel_irqbalance);
#endif
#if defined(CONFIG_HPET_TIMER)
@@ -56,7 +59,9 @@ unsigned long force_hpet_address;
static enum {
NONE_FORCE_HPET_RESUME,
OLD_ICH_FORCE_HPET_RESUME,
- ICH_FORCE_HPET_RESUME
+ ICH_FORCE_HPET_RESUME,
+ VT8237_FORCE_HPET_RESUME,
+ NVIDIA_FORCE_HPET_RESUME,
} force_hpet_resume_type;
static void __iomem *rcba_base;
@@ -146,17 +151,17 @@ static void ich_force_enable_hpet(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0,
- ich_force_enable_hpet);
+ ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,
- ich_force_enable_hpet);
+ ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0,
- ich_force_enable_hpet);
+ ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1,
- ich_force_enable_hpet);
+ ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
- ich_force_enable_hpet);
+ ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
- ich_force_enable_hpet);
+ ich_force_enable_hpet);
static struct pci_dev *cached_dev;
@@ -232,10 +237,140 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev)
printk(KERN_DEBUG "Failed to force enable HPET\n");
}
+/*
+ * Undocumented chipset features. Make sure that the user enforced
+ * this.
+ */
+static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
+{
+ if (hpet_force_user)
+ old_ich_force_enable_hpet(dev);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
+ old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
+ old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0,
+ old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12,
+ old_ich_force_enable_hpet_user);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0,
- old_ich_force_enable_hpet);
+ old_ich_force_enable_hpet);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12,
- old_ich_force_enable_hpet);
+ old_ich_force_enable_hpet);
+
+
+static void vt8237_force_hpet_resume(void)
+{
+ u32 val;
+
+ if (!force_hpet_address || !cached_dev)
+ return;
+
+ val = 0xfed00000 | 0x80;
+ pci_write_config_dword(cached_dev, 0x68, val);
+
+ pci_read_config_dword(cached_dev, 0x68, &val);
+ if (val & 0x80)
+ printk(KERN_DEBUG "Force enabled HPET at resume\n");
+ else
+ BUG();
+}
+
+static void vt8237_force_enable_hpet(struct pci_dev *dev)
+{
+ u32 uninitialized_var(val);
+
+ if (!hpet_force_user || hpet_address || force_hpet_address)
+ return;
+
+ pci_read_config_dword(dev, 0x68, &val);
+ /*
+ * Bit 7 is HPET enable bit.
+ * Bit 31:10 is HPET base address (contrary to what datasheet claims)
+ */
+ if (val & 0x80) {
+ force_hpet_address = (val & ~0x3ff);
+ printk(KERN_DEBUG "HPET at base address 0x%lx\n",
+ force_hpet_address);
+ return;
+ }
+
+ /*
+ * HPET is disabled. Trying enabling at FED00000 and check
+ * whether it sticks
+ */
+ val = 0xfed00000 | 0x80;
+ pci_write_config_dword(dev, 0x68, val);
+
+ pci_read_config_dword(dev, 0x68, &val);
+ if (val & 0x80) {
+ force_hpet_address = (val & ~0x3ff);
+ printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+ force_hpet_address);
+ cached_dev = dev;
+ force_hpet_resume_type = VT8237_FORCE_HPET_RESUME;
+ return;
+ }
+
+ printk(KERN_DEBUG "Failed to force enable HPET\n");
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
+ vt8237_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
+ vt8237_force_enable_hpet);
+
+/*
+ * Undocumented chipset feature taken from LinuxBIOS.
+ */
+static void nvidia_force_hpet_resume(void)
+{
+ pci_write_config_dword(cached_dev, 0x44, 0xfed00001);
+ printk(KERN_DEBUG "Force enabled HPET at resume\n");
+}
+
+static void nvidia_force_enable_hpet(struct pci_dev *dev)
+{
+ u32 uninitialized_var(val);
+
+ if (!hpet_force_user || hpet_address || force_hpet_address)
+ return;
+
+ pci_write_config_dword(dev, 0x44, 0xfed00001);
+ pci_read_config_dword(dev, 0x44, &val);
+ force_hpet_address = val & 0xfffffffe;
+ force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME;
+ printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+ force_hpet_address);
+ cached_dev = dev;
+ return;
+}
+
+/* ISA Bridges */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0050,
+ nvidia_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051,
+ nvidia_force_enable_hpet);
+
+/* LPC bridges */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360,
+ nvidia_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361,
+ nvidia_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0362,
+ nvidia_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0363,
+ nvidia_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0364,
+ nvidia_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0365,
+ nvidia_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0366,
+ nvidia_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367,
+ nvidia_force_enable_hpet);
void force_hpet_resume(void)
{
@@ -246,6 +381,12 @@ void force_hpet_resume(void)
case OLD_ICH_FORCE_HPET_RESUME:
return old_ich_force_hpet_resume();
+ case VT8237_FORCE_HPET_RESUME:
+ return vt8237_force_hpet_resume();
+
+ case NVIDIA_FORCE_HPET_RESUME:
+ return nvidia_force_hpet_resume();
+
default:
break;
}
diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c
index 368db2b9c5a..71b13c5f581 100644
--- a/arch/x86/kernel/reboot_64.c
+++ b/arch/x86/kernel/reboot_64.c
@@ -11,12 +11,13 @@
#include <linux/sched.h>
#include <asm/io.h>
#include <asm/delay.h>
+#include <asm/desc.h>
#include <asm/hw_irq.h>
#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/apic.h>
-#include <asm/iommu.h>
+#include <asm/gart.h>
/*
* Power off function, if any
@@ -136,7 +137,7 @@ void machine_emergency_restart(void)
}
case BOOT_TRIPLE:
- __asm__ __volatile__("lidt (%0)": :"r" (&no_idt));
+ load_idt((const struct desc_ptr *)&no_idt);
__asm__ __volatile__("int3");
reboot_type = BOOT_KBD;
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index 8b30b26ad06..1a07bbea7be 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -12,6 +12,7 @@
#include <linux/interrupt.h>
#include <asm/reboot_fixups.h>
#include <asm/msr.h>
+#include <asm/geode.h>
static void cs5530a_warm_reset(struct pci_dev *dev)
{
@@ -24,11 +25,8 @@ static void cs5530a_warm_reset(struct pci_dev *dev)
static void cs5536_warm_reset(struct pci_dev *dev)
{
- /*
- * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET)
- * writing 1 to the LSB of this MSR causes a hard reset.
- */
- wrmsrl(0x51400017, 1ULL);
+ /* writing 1 to the LSB of this MSR causes a hard reset */
+ wrmsrl(MSR_DIVIL_SOFT_RESET, 1ULL);
udelay(50); /* shouldn't get here but be safe and spin a while */
}
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index 1200aaac403..3558ac78c92 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -11,7 +11,6 @@
#include <linux/bootmem.h>
#include <linux/bitops.h>
#include <linux/module.h>
-#include <asm/bootsetup.h>
#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -23,8 +22,9 @@
#include <asm/percpu.h>
#include <asm/proto.h>
#include <asm/sections.h>
+#include <asm/setup.h>
-char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
+struct boot_params __initdata boot_params;
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
@@ -185,6 +185,12 @@ void __cpuinit check_efer(void)
unsigned long kernel_eflags;
/*
+ * Copies of the original ist values from the tss are only accessed during
+ * debugging, no special alignment required.
+ */
+DEFINE_PER_CPU(struct orig_ist, orig_ist);
+
+/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
@@ -224,8 +230,8 @@ void __cpuinit cpu_init (void)
memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
cpu_gdt_descr[cpu].size = GDT_SIZE;
- asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
- asm volatile("lidt %0" :: "m" (idt_descr));
+ load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
+ load_idt((const struct desc_ptr *)&idt_descr);
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index c8e1bc38d42..e1e18c34c82 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -60,6 +60,7 @@
#include <asm/vmi.h>
#include <setup_arch.h>
#include <bios_ebda.h>
+#include <asm/cacheflush.h>
/* This value is set up by the early boot code to point to the value
immediately after the boot time page tables. It contains a *physical*
@@ -73,6 +74,7 @@ int disable_pse __devinitdata = 0;
*/
extern struct resource code_resource;
extern struct resource data_resource;
+extern struct resource bss_resource;
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
@@ -84,9 +86,6 @@ unsigned long mmu_cr4_features;
/* for MCA, but anyone else can use it if they want */
unsigned int machine_id;
-#ifdef CONFIG_MCA
-EXPORT_SYMBOL(machine_id);
-#endif
unsigned int machine_submodel_id;
unsigned int BIOS_revision;
unsigned int mca_pentium_flag;
@@ -137,10 +136,11 @@ EXPORT_SYMBOL(edd);
*/
static inline void copy_edd(void)
{
- memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
- memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
- edd.mbr_signature_nr = EDD_MBR_SIG_NR;
- edd.edd_info_nr = EDD_NR;
+ memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
+ sizeof(edd.mbr_signature));
+ memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
+ edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
+ edd.edd_info_nr = boot_params.eddbuf_entries;
}
#else
static inline void copy_edd(void)
@@ -377,6 +377,49 @@ extern unsigned long __init setup_memory(void);
extern void zone_sizes_init(void);
#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+static inline unsigned long long get_total_mem(void)
+{
+ unsigned long long total;
+
+ total = max_low_pfn - min_low_pfn;
+#ifdef CONFIG_HIGHMEM
+ total += highend_pfn - highstart_pfn;
+#endif
+
+ return total << PAGE_SHIFT;
+}
+
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+ unsigned long long total_mem;
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ total_mem = get_total_mem();
+
+ ret = parse_crashkernel(boot_command_line, total_mem,
+ &crash_size, &crash_base);
+ if (ret == 0 && crash_size > 0) {
+ if (crash_base > 0) {
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(total_mem >> 20));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ reserve_bootmem(crash_base, crash_size);
+ } else
+ printk(KERN_INFO "crashkernel reservation failed - "
+ "you have to specify a base address\n");
+ }
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
void __init setup_bootmem_allocator(void)
{
unsigned long bootmap_size;
@@ -434,26 +477,25 @@ void __init setup_bootmem_allocator(void)
#endif
numa_kva_reserve();
#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START) {
- if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
- reserve_bootmem(INITRD_START, INITRD_SIZE);
- initrd_start = INITRD_START + PAGE_OFFSET;
- initrd_end = initrd_start+INITRD_SIZE;
- }
- else {
+ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+ unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
+ unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
+ unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+
+ if (ramdisk_end <= end_of_lowmem) {
+ reserve_bootmem(ramdisk_image, ramdisk_size);
+ initrd_start = ramdisk_image + PAGE_OFFSET;
+ initrd_end = initrd_start+ramdisk_size;
+ } else {
printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- INITRD_START + INITRD_SIZE,
- max_low_pfn << PAGE_SHIFT);
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ ramdisk_end, end_of_lowmem);
initrd_start = 0;
}
}
#endif
-#ifdef CONFIG_KEXEC
- if (crashk_res.start != crashk_res.end)
- reserve_bootmem(crashk_res.start,
- crashk_res.end - crashk_res.start + 1);
-#endif
+ reserve_crashkernel();
}
/*
@@ -512,28 +554,29 @@ void __init setup_arch(char **cmdline_p)
* the system table is valid. If not, then initialize normally.
*/
#ifdef CONFIG_EFI
- if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
+ if ((boot_params.hdr.type_of_loader == 0x50) &&
+ boot_params.efi_info.efi_systab)
efi_enabled = 1;
#endif
- ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
- screen_info = SCREEN_INFO;
- edid_info = EDID_INFO;
- apm_info.bios = APM_BIOS_INFO;
- ist_info = IST_INFO;
- saved_videomode = VIDEO_MODE;
- if( SYS_DESC_TABLE.length != 0 ) {
- set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
- machine_id = SYS_DESC_TABLE.table[0];
- machine_submodel_id = SYS_DESC_TABLE.table[1];
- BIOS_revision = SYS_DESC_TABLE.table[2];
+ ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
+ screen_info = boot_params.screen_info;
+ edid_info = boot_params.edid_info;
+ apm_info.bios = boot_params.apm_bios_info;
+ ist_info = boot_params.ist_info;
+ saved_videomode = boot_params.hdr.vid_mode;
+ if( boot_params.sys_desc_table.length != 0 ) {
+ set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
+ machine_id = boot_params.sys_desc_table.table[0];
+ machine_submodel_id = boot_params.sys_desc_table.table[1];
+ BIOS_revision = boot_params.sys_desc_table.table[2];
}
- bootloader_type = LOADER_TYPE;
+ bootloader_type = boot_params.hdr.type_of_loader;
#ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+ rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
+ rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
+ rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
#endif
ARCH_SETUP
if (efi_enabled)
@@ -545,7 +588,7 @@ void __init setup_arch(char **cmdline_p)
copy_edd();
- if (!MOUNT_ROOT_RDONLY)
+ if (!boot_params.hdr.root_flags)
root_mountflags &= ~MS_RDONLY;
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
@@ -556,6 +599,8 @@ void __init setup_arch(char **cmdline_p)
code_resource.end = virt_to_phys(_etext)-1;
data_resource.start = virt_to_phys(_etext);
data_resource.end = virt_to_phys(_edata)-1;
+ bss_resource.start = virt_to_phys(&__bss_start);
+ bss_resource.end = virt_to_phys(&__bss_stop)-1;
parse_early_param();
@@ -580,7 +625,7 @@ void __init setup_arch(char **cmdline_p)
/*
* NOTE: before this point _nobody_ is allowed to allocate
* any memory using the bootmem allocator. Although the
- * alloctor is now initialised only the first 8Mb of the kernel
+ * allocator is now initialised only the first 8Mb of the kernel
* virtual address space has been mapped. All allocations before
* paging_init() has completed must use the alloc_bootmem_low_pages()
* variant (which allocates DMA'able memory) and care must be taken
@@ -617,9 +662,7 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_PCI
-#ifdef CONFIG_X86_IO_APIC
- check_acpi_pci(); /* Checks more than just ACPI actually */
-#endif
+ early_quirks();
#endif
#ifdef CONFIG_ACPI
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index b7da90e79c7..238633d3d09 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -52,13 +52,13 @@
#include <asm/dma.h>
#include <asm/mpspec.h>
#include <asm/mmu_context.h>
-#include <asm/bootsetup.h>
#include <asm/proto.h>
#include <asm/setup.h>
#include <asm/mach_apic.h>
#include <asm/numa.h>
#include <asm/sections.h>
#include <asm/dmi.h>
+#include <asm/cacheflush.h>
/*
* Machine setup..
@@ -134,6 +134,12 @@ struct resource code_resource = {
.end = 0,
.flags = IORESOURCE_RAM,
};
+struct resource bss_resource = {
+ .name = "Kernel bss",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_RAM,
+};
#ifdef CONFIG_PROC_VMCORE
/* elfcorehdr= specifies the location of elf core header
@@ -180,10 +186,11 @@ EXPORT_SYMBOL(edd);
*/
static inline void copy_edd(void)
{
- memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
- memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
- edd.mbr_signature_nr = EDD_MBR_SIG_NR;
- edd.edd_info_nr = EDD_NR;
+ memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
+ sizeof(edd.mbr_signature));
+ memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
+ edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
+ edd.edd_info_nr = boot_params.eddbuf_entries;
}
#else
static inline void copy_edd(void)
@@ -191,6 +198,37 @@ static inline void copy_edd(void)
}
#endif
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+ unsigned long long free_mem;
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+
+ ret = parse_crashkernel(boot_command_line, free_mem,
+ &crash_size, &crash_base);
+ if (ret == 0 && crash_size) {
+ if (crash_base > 0) {
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(free_mem >> 20));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ reserve_bootmem(crash_base, crash_size);
+ } else
+ printk(KERN_INFO "crashkernel reservation failed - "
+ "you have to specify a base address\n");
+ }
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
#define EBDA_ADDR_POINTER 0x40E
unsigned __initdata ebda_addr;
@@ -220,21 +258,21 @@ void __init setup_arch(char **cmdline_p)
{
printk(KERN_INFO "Command line: %s\n", boot_command_line);
- ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
- screen_info = SCREEN_INFO;
- edid_info = EDID_INFO;
- saved_video_mode = SAVED_VIDEO_MODE;
- bootloader_type = LOADER_TYPE;
+ ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
+ screen_info = boot_params.screen_info;
+ edid_info = boot_params.edid_info;
+ saved_video_mode = boot_params.hdr.vid_mode;
+ bootloader_type = boot_params.hdr.type_of_loader;
#ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+ rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
+ rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
+ rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
#endif
setup_memory_region();
copy_edd();
- if (!MOUNT_ROOT_RDONLY)
+ if (!boot_params.hdr.root_flags)
root_mountflags &= ~MS_RDONLY;
init_mm.start_code = (unsigned long) &_text;
init_mm.end_code = (unsigned long) &_etext;
@@ -245,6 +283,8 @@ void __init setup_arch(char **cmdline_p)
code_resource.end = virt_to_phys(&_etext)-1;
data_resource.start = virt_to_phys(&_etext);
data_resource.end = virt_to_phys(&_edata)-1;
+ bss_resource.start = virt_to_phys(&__bss_start);
+ bss_resource.end = virt_to_phys(&__bss_stop)-1;
early_identify_cpu(&boot_cpu_data);
@@ -271,6 +311,11 @@ void __init setup_arch(char **cmdline_p)
dmi_scan_machine();
+#ifdef CONFIG_SMP
+ /* setup to use the static apicid table during kernel startup */
+ x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
+#endif
+
#ifdef CONFIG_ACPI
/*
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -339,28 +384,25 @@ void __init setup_arch(char **cmdline_p)
*/
find_smp_config();
#ifdef CONFIG_BLK_DEV_INITRD
- if (LOADER_TYPE && INITRD_START) {
- if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
- reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
- initrd_start = INITRD_START + PAGE_OFFSET;
- initrd_end = initrd_start+INITRD_SIZE;
- }
- else {
+ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+ unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
+ unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
+ unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
+
+ if (ramdisk_end <= end_of_mem) {
+ reserve_bootmem_generic(ramdisk_image, ramdisk_size);
+ initrd_start = ramdisk_image + PAGE_OFFSET;
+ initrd_end = initrd_start+ramdisk_size;
+ } else {
printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- (unsigned long)(INITRD_START + INITRD_SIZE),
- (unsigned long)(end_pfn << PAGE_SHIFT));
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ ramdisk_end, end_of_mem);
initrd_start = 0;
}
}
#endif
-#ifdef CONFIG_KEXEC
- if (crashk_res.start != crashk_res.end) {
- reserve_bootmem_generic(crashk_res.start,
- crashk_res.end - crashk_res.start + 1);
- }
-#endif
-
+ reserve_crashkernel();
paging_init();
#ifdef CONFIG_PCI
@@ -526,7 +568,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
but in the same order as the HT nodeids.
If that doesn't result in a usable node fall back to the
path for the previous case. */
- int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
+ int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
if (ht_nodeid >= 0 &&
apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
node = apicid_to_node[ht_nodeid];
@@ -601,7 +643,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
level = cpuid_eax(1);
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
- if (c->x86 == 0x10)
+ if (c->x86 == 0x10 || c->x86 == 0x11)
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
/* Enable workaround for FXSAVE leak */
@@ -850,6 +892,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_SMP
c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+ c->cpu_index = 0;
#endif
}
@@ -956,6 +999,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
static int show_cpuinfo(struct seq_file *m, void *v)
{
struct cpuinfo_x86 *c = v;
+ int cpu = 0;
/*
* These flag bits must match the definitions in <asm/cpufeature.h>.
@@ -965,7 +1009,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
* applications want to get the raw CPUID data, they should access
* /dev/cpu/<cpu_nr>/cpuid instead.
*/
- static char *x86_cap_flags[] = {
+ static const char *const x86_cap_flags[] = {
/* Intel-defined */
"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
@@ -996,7 +1040,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* Intel-defined (#2) */
"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
+ NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* VIA/Cyrix/Centaur-defined */
@@ -1006,10 +1050,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
- "altmovcr8", "abm", "sse4a",
- "misalignsse", "3dnowprefetch",
- "osvw", "ibs", NULL, NULL, NULL, NULL,
+ "lahf_lm", "cmp_legacy", "svm", "extapic",
+ "cr8_legacy", "abm", "sse4a", "misalignsse",
+ "3dnowprefetch", "osvw", "ibs", "sse5",
+ "skinit", "wdt", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1019,7 +1063,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
- static char *x86_power_flags[] = {
+ static const char *const x86_power_flags[] = {
"ts", /* temperature sensor */
"fid", /* frequency id control */
"vid", /* voltage id control */
@@ -1034,8 +1078,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#ifdef CONFIG_SMP
- if (!cpu_online(c-cpu_data))
+ if (!cpu_online(c->cpu_index))
return 0;
+ cpu = c->cpu_index;
#endif
seq_printf(m,"processor\t: %u\n"
@@ -1043,7 +1088,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
"cpu family\t: %d\n"
"model\t\t: %d\n"
"model name\t: %s\n",
- (unsigned)(c-cpu_data),
+ (unsigned)cpu,
c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
c->x86,
(int)c->x86_model,
@@ -1055,7 +1100,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "stepping\t: unknown\n");
if (cpu_has(c,X86_FEATURE_TSC)) {
- unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
+ unsigned int freq = cpufreq_quick_get((unsigned)cpu);
if (!freq)
freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
@@ -1068,9 +1113,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#ifdef CONFIG_SMP
if (smp_num_siblings * c->x86_max_cores > 1) {
- int cpu = c - cpu_data;
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
- seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
+ seq_printf(m, "siblings\t: %d\n",
+ cpus_weight(per_cpu(cpu_core_map, cpu)));
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
}
@@ -1125,12 +1170,16 @@ static int show_cpuinfo(struct seq_file *m, void *v)
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+ if (*pos == 0) /* just in case, cpu 0 is not the first */
+ *pos = first_cpu(cpu_possible_map);
+ if ((*pos) < NR_CPUS && cpu_possible(*pos))
+ return &cpu_data(*pos);
+ return NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
- ++*pos;
+ *pos = next_cpu(*pos, cpu_possible_map);
return c_start(m, pos);
}
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d01d51fcce2..9bdd83022f5 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -200,8 +200,8 @@ badframe:
if (show_unhandled_signals && printk_ratelimit())
printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
" esp:%lx oeax:%lx\n",
- current->pid > 1 ? KERN_INFO : KERN_EMERG,
- current->comm, current->pid, frame, regs->eip,
+ task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+ current->comm, task_pid_nr(current), frame, regs->eip,
regs->esp, regs->orig_eax);
force_sig(SIGSEGV, current);
@@ -385,7 +385,6 @@ static int setup_frame(int sig, struct k_sigaction *ka,
regs->edx = (unsigned long) 0;
regs->ecx = (unsigned long) 0;
- set_fs(USER_DS);
regs->xds = __USER_DS;
regs->xes = __USER_DS;
regs->xss = __USER_DS;
@@ -479,7 +478,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->edx = (unsigned long) &frame->info;
regs->ecx = (unsigned long) &frame->uc;
- set_fs(USER_DS);
regs->xds = __USER_DS;
regs->xes = __USER_DS;
regs->xss = __USER_DS;
@@ -596,7 +594,7 @@ static void fastcall do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
- /* Reenable any watchpoints before delivering the
+ /* Re-enable any watchpoints before delivering the
* signal to user space. The processor register will
* have been cleared if the watchpoint triggered
* inside the kernel.
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 683802bec41..ab086b0357f 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -410,7 +410,7 @@ static void do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
- /* Reenable any watchpoints before delivering the
+ /* Re-enable any watchpoints before delivering the
* signal to user space. The processor register will
* have been cleared if the watchpoint triggered
* inside the kernel.
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index 2d35d850202..fcaa026eb80 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -69,7 +69,7 @@
*
* B stepping CPUs may hang. There are hardware work arounds
* for this. We warn about it in case your board doesn't have the work
- * arounds. Basically thats so I can tell anyone with a B stepping
+ * arounds. Basically that's so I can tell anyone with a B stepping
* CPU and SMP problems "tough".
*
* Specific items [From Pentium Processor Specification Update]
@@ -273,7 +273,7 @@ void leave_mm(unsigned long cpu)
* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
* Stop ipi delivery for the old mm. This is not synchronized with
* the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superflous
+ * for the wrong mm, and in the worst case we perform a superfluous
* tlb flush.
* 1a2) set cpu_tlbstate to TLBSTATE_OK
* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
@@ -342,6 +342,7 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
smp_mb__after_clear_bit();
out:
put_cpu_no_resched();
+ __get_cpu_var(irq_stat).irq_tlb_count++;
}
void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
@@ -609,7 +610,7 @@ static void stop_this_cpu (void * dummy)
*/
cpu_clear(smp_processor_id(), cpu_online_map);
disable_local_APIC();
- if (cpu_data[smp_processor_id()].hlt_works_ok)
+ if (cpu_data(smp_processor_id()).hlt_works_ok)
for(;;) halt();
for (;;);
}
@@ -640,6 +641,7 @@ static void native_smp_send_stop(void)
fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
+ __get_cpu_var(irq_stat).irq_resched_count++;
}
fastcall void smp_call_function_interrupt(struct pt_regs *regs)
@@ -660,6 +662,7 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
*/
irq_enter();
(*func)(info);
+ __get_cpu_var(irq_stat).irq_call_count++;
irq_exit();
if (wait) {
@@ -673,7 +676,7 @@ static int convert_apicid_to_cpu(int apic_id)
int i;
for (i = 0; i < NR_CPUS; i++) {
- if (x86_cpu_to_apicid[i] == apic_id)
+ if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
return i;
}
return -1;
@@ -705,3 +708,4 @@ struct smp_ops smp_ops = {
.smp_send_reschedule = native_smp_send_reschedule,
.smp_call_function_mask = native_smp_call_function_mask,
};
+EXPORT_SYMBOL_GPL(smp_ops);
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index df4a82812ad..03fa6ed559c 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -163,6 +163,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
out:
ack_APIC_irq();
cpu_clear(cpu, f->flush_cpumask);
+ add_pda(irq_tlb_count, 1);
}
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
@@ -321,17 +322,27 @@ void unlock_ipi_call_lock(void)
}
/*
- * this function sends a 'generic call function' IPI to one other CPU
- * in the system.
- *
- * cpu is a standard Linux logical CPU number.
+ * this function sends a 'generic call function' IPI to all other CPU
+ * of the system defined in the mask.
*/
-static void
-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
+
+static int
+__smp_call_function_mask(cpumask_t mask,
+ void (*func)(void *), void *info,
+ int wait)
{
struct call_data_struct data;
- int cpus = 1;
+ cpumask_t allbutself;
+ int cpus;
+
+ allbutself = cpu_online_map;
+ cpu_clear(smp_processor_id(), allbutself);
+
+ cpus_and(mask, mask, allbutself);
+ cpus = cpus_weight(mask);
+
+ if (!cpus)
+ return 0;
data.func = func;
data.info = info;
@@ -342,19 +353,55 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info,
call_data = &data;
wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+
+ /* Send a message to other CPUs */
+ if (cpus_equal(mask, allbutself))
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+ else
+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
/* Wait for response */
while (atomic_read(&data.started) != cpus)
cpu_relax();
if (!wait)
- return;
+ return 0;
while (atomic_read(&data.finished) != cpus)
cpu_relax();
+
+ return 0;
+}
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on. Must not include the current cpu.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask,
+ void (*func)(void *), void *info,
+ int wait)
+{
+ int ret;
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
+ spin_lock(&call_lock);
+ ret = __smp_call_function_mask(mask, func, info, wait);
+ spin_unlock(&call_lock);
+ return ret;
}
+EXPORT_SYMBOL(smp_call_function_mask);
/*
* smp_call_function_single - Run a function on a specific CPU
@@ -373,6 +420,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
int nonatomic, int wait)
{
/* prevent preemption and reschedule on another processor */
+ int ret;
int me = get_cpu();
/* Can deadlock when called with interrupts disabled */
@@ -386,51 +434,14 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
return 0;
}
- spin_lock(&call_lock);
- __smp_call_function_single(cpu, func, info, nonatomic, wait);
- spin_unlock(&call_lock);
+ ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
+
put_cpu();
- return 0;
+ return ret;
}
EXPORT_SYMBOL(smp_call_function_single);
/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-static void __smp_call_function (void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- struct call_data_struct data;
- int cpus = num_online_cpus()-1;
-
- if (!cpus)
- return;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (!wait)
- return;
-
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
-}
-
-/*
* smp_call_function - run a function on all other CPUs.
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
@@ -448,10 +459,7 @@ static void __smp_call_function (void (*func) (void *info), void *info,
int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
int wait)
{
- spin_lock(&call_lock);
- __smp_call_function(func,info,nonatomic,wait);
- spin_unlock(&call_lock);
- return 0;
+ return smp_call_function_mask(cpu_online_map, func, info, wait);
}
EXPORT_SYMBOL(smp_call_function);
@@ -478,7 +486,7 @@ void smp_send_stop(void)
/* Don't deadlock on the call lock in panic */
nolock = !spin_trylock(&call_lock);
local_irq_save(flags);
- __smp_call_function(stop_this_cpu, NULL, 0, 0);
+ __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
if (!nolock)
spin_unlock(&call_lock);
disable_local_APIC();
@@ -493,6 +501,7 @@ void smp_send_stop(void)
asmlinkage void smp_reschedule_interrupt(void)
{
ack_APIC_irq();
+ add_pda(irq_resched_count, 1);
}
asmlinkage void smp_call_function_interrupt(void)
@@ -514,6 +523,7 @@ asmlinkage void smp_call_function_interrupt(void)
exit_idle();
irq_enter();
(*func)(info);
+ add_pda(irq_call_count, 1);
irq_exit();
if (wait) {
mb();
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index e4f61d1c624..ef0f34ede1a 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -67,15 +67,15 @@ int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
-int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
/* representing HT siblings of each logical CPU */
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
/* representing HT and core siblings of each logical CPU */
-cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
/* bitmap of online cpus */
cpumask_t cpu_online_map __read_mostly;
@@ -89,12 +89,20 @@ EXPORT_SYMBOL(cpu_possible_map);
static cpumask_t smp_commenced_mask;
/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
+DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+EXPORT_PER_CPU_SYMBOL(cpu_info);
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
- { [0 ... NR_CPUS-1] = 0xff };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
+/*
+ * The following static array is used during kernel startup
+ * and the x86_cpu_to_apicid_ptr contains the address of the
+ * array during this time. Is it zeroed when the per_cpu
+ * data area is removed.
+ */
+u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
+ { [0 ... NR_CPUS-1] = BAD_APICID };
+void *x86_cpu_to_apicid_ptr;
+DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
u8 apicid_2_node[MAX_APICID];
@@ -102,8 +110,8 @@ u8 apicid_2_node[MAX_APICID];
* Trampoline 80x86 program as an array.
*/
-extern unsigned char trampoline_data [];
-extern unsigned char trampoline_end [];
+extern const unsigned char trampoline_data [];
+extern const unsigned char trampoline_end [];
static unsigned char *trampoline_base;
static int trampoline_exec;
@@ -118,7 +126,7 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
* has made sure it's suitably aligned.
*/
-static unsigned long __devinit setup_trampoline(void)
+static unsigned long __cpuinit setup_trampoline(void)
{
memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(trampoline_base);
@@ -150,9 +158,10 @@ void __init smp_alloc_memory(void)
void __cpuinit smp_store_cpu_info(int id)
{
- struct cpuinfo_x86 *c = cpu_data + id;
+ struct cpuinfo_x86 *c = &cpu_data(id);
*c = boot_cpu_data;
+ c->cpu_index = id;
if (id!=0)
identify_secondary_cpu(c);
/*
@@ -294,13 +303,13 @@ static int cpucount;
/* maps the cpu to the sched domain representing multi-core */
cpumask_t cpu_coregroup_map(int cpu)
{
- struct cpuinfo_x86 *c = cpu_data + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
/*
* For perf, we return last level cache shared map.
* And for power savings, we return cpu_core_map
*/
if (sched_mc_power_savings || sched_smt_power_savings)
- return cpu_core_map[cpu];
+ return per_cpu(cpu_core_map, cpu);
else
return c->llc_shared_map;
}
@@ -311,61 +320,61 @@ static cpumask_t cpu_sibling_setup_map;
void __cpuinit set_cpu_sibling_map(int cpu)
{
int i;
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
cpu_set(cpu, cpu_sibling_setup_map);
if (smp_num_siblings > 1) {
for_each_cpu_mask(i, cpu_sibling_setup_map) {
- if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
- c[cpu].cpu_core_id == c[i].cpu_core_id) {
- cpu_set(i, cpu_sibling_map[cpu]);
- cpu_set(cpu, cpu_sibling_map[i]);
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
- cpu_set(i, c[cpu].llc_shared_map);
- cpu_set(cpu, c[i].llc_shared_map);
+ if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
+ c->cpu_core_id == cpu_data(i).cpu_core_id) {
+ cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+ cpu_set(i, per_cpu(cpu_core_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_core_map, i));
+ cpu_set(i, c->llc_shared_map);
+ cpu_set(cpu, cpu_data(i).llc_shared_map);
}
}
} else {
- cpu_set(cpu, cpu_sibling_map[cpu]);
+ cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
}
- cpu_set(cpu, c[cpu].llc_shared_map);
+ cpu_set(cpu, c->llc_shared_map);
if (current_cpu_data.x86_max_cores == 1) {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- c[cpu].booted_cores = 1;
+ per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
+ c->booted_cores = 1;
return;
}
for_each_cpu_mask(i, cpu_sibling_setup_map) {
- if (cpu_llc_id[cpu] != BAD_APICID &&
- cpu_llc_id[cpu] == cpu_llc_id[i]) {
- cpu_set(i, c[cpu].llc_shared_map);
- cpu_set(cpu, c[i].llc_shared_map);
+ if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
+ per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
+ cpu_set(i, c->llc_shared_map);
+ cpu_set(cpu, cpu_data(i).llc_shared_map);
}
- if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
+ if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
+ cpu_set(i, per_cpu(cpu_core_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_core_map, i));
/*
* Does this new cpu bringup a new core?
*/
- if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
/*
* for each core in package, increment
* the booted_cores for this new cpu
*/
- if (first_cpu(cpu_sibling_map[i]) == i)
- c[cpu].booted_cores++;
+ if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
+ c->booted_cores++;
/*
* increment the core count for all
* the other cpus in this package
*/
if (i != cpu)
- c[i].booted_cores++;
- } else if (i != cpu && !c[cpu].booted_cores)
- c[cpu].booted_cores = c[i].booted_cores;
+ cpu_data(i).booted_cores++;
+ } else if (i != cpu && !c->booted_cores)
+ c->booted_cores = cpu_data(i).booted_cores;
}
}
}
@@ -412,7 +421,7 @@ static void __cpuinit start_secondary(void *unused)
/*
* We need to hold call_lock, so there is no inconsistency
* between the time smp_call_function() determines number of
- * IPI receipients, and the time when the determination is made
+ * IPI recipients, and the time when the determination is made
* for which cpus receive the IPI. Holding this
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
@@ -804,7 +813,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
irq_ctx_init(cpu);
- x86_cpu_to_apicid[cpu] = apicid;
+ per_cpu(x86_cpu_to_apicid, cpu) = apicid;
/*
* This grunge runs the startup process for
* the targeted processor.
@@ -844,7 +853,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
/* number CPUs logically, starting from 1 (BSP is 0) */
Dprintk("OK.\n");
printk("CPU%d: ", cpu);
- print_cpu_info(&cpu_data[cpu]);
+ print_cpu_info(&cpu_data(cpu));
Dprintk("CPU has booted.\n");
} else {
boot_error= 1;
@@ -866,7 +875,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
cpucount--;
} else {
- x86_cpu_to_apicid[cpu] = apicid;
+ per_cpu(x86_cpu_to_apicid, cpu) = apicid;
cpu_set(cpu, cpu_present_map);
}
@@ -915,7 +924,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
struct warm_boot_cpu_info info;
int apicid, ret;
- apicid = x86_cpu_to_apicid[cpu];
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
if (apicid == BAD_APICID) {
ret = -ENODEV;
goto exit;
@@ -961,11 +970,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
*/
smp_store_cpu_info(0); /* Final full version of the data */
printk("CPU%d: ", 0);
- print_cpu_info(&cpu_data[0]);
+ print_cpu_info(&cpu_data(0));
boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
boot_cpu_logical_apicid = logical_smp_processor_id();
- x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+ per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid;
current_thread_info()->cpu = 0;
@@ -983,8 +992,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
printk(KERN_NOTICE "Local APIC not detected."
" Using dummy APIC emulation.\n");
map_cpu_to_logical_apicid();
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, per_cpu(cpu_core_map, 0));
return;
}
@@ -1008,8 +1017,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
smpboot_clear_io_apic_irqs();
phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ map_cpu_to_logical_apicid();
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, per_cpu(cpu_core_map, 0));
return;
}
@@ -1021,10 +1031,17 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (!max_cpus) {
smp_found_config = 0;
printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ printk(KERN_INFO "activating minimal APIC for NMI watchdog use.\n");
+ connect_bsp_APIC();
+ setup_local_APIC();
+ }
smpboot_clear_io_apic_irqs();
phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ map_cpu_to_logical_apicid();
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, per_cpu(cpu_core_map, 0));
return;
}
@@ -1076,7 +1093,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
Dprintk("Before bogomips.\n");
for (cpu = 0; cpu < NR_CPUS; cpu++)
if (cpu_isset(cpu, cpu_callout_map))
- bogosum += cpu_data[cpu].loops_per_jiffy;
+ bogosum += cpu_data(cpu).loops_per_jiffy;
printk(KERN_INFO
"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
cpucount+1,
@@ -1102,16 +1119,16 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
Dprintk("Boot done.\n");
/*
- * construct cpu_sibling_map[], so that we can tell sibling CPUs
+ * construct cpu_sibling_map, so that we can tell sibling CPUs
* efficiently.
*/
for (cpu = 0; cpu < NR_CPUS; cpu++) {
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ cpus_clear(per_cpu(cpu_core_map, cpu));
}
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, per_cpu(cpu_core_map, 0));
smpboot_setup_io_apic();
@@ -1146,23 +1163,23 @@ void __init native_smp_prepare_boot_cpu(void)
void remove_siblinginfo(int cpu)
{
int sibling;
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
- for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
- cpu_clear(cpu, cpu_core_map[sibling]);
- /*
+ for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
+ cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
+ /*/
* last thread sibling in this cpu core going down
*/
- if (cpus_weight(cpu_sibling_map[cpu]) == 1)
- c[sibling].booted_cores--;
+ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
+ cpu_data(sibling).booted_cores--;
}
- for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[sibling]);
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- c[cpu].phys_proc_id = 0;
- c[cpu].cpu_core_id = 0;
+ for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ cpus_clear(per_cpu(cpu_core_map, cpu));
+ c->phys_proc_id = 0;
+ c->cpu_core_id = 0;
cpu_clear(cpu, cpu_sibling_setup_map);
}
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 720a7d1f886..500670c93d8 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -65,7 +65,7 @@ int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
-u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
/* Bitmask of currently online CPUs */
cpumask_t cpu_online_map __read_mostly;
@@ -84,26 +84,26 @@ cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
+DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+EXPORT_PER_CPU_SYMBOL(cpu_info);
/* Set when the idlers are all forked */
int smp_threads_ready;
/* representing HT siblings of each logical CPU */
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
/* representing HT and core siblings of each logical CPU */
-cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
/*
* Trampoline 80x86 program as an array.
*/
-extern unsigned char trampoline_data[];
-extern unsigned char trampoline_end[];
+extern const unsigned char trampoline_data[];
+extern const unsigned char trampoline_end[];
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -138,9 +138,10 @@ static unsigned long __cpuinit setup_trampoline(void)
static void __cpuinit smp_store_cpu_info(int id)
{
- struct cpuinfo_x86 *c = cpu_data + id;
+ struct cpuinfo_x86 *c = &cpu_data(id);
*c = boot_cpu_data;
+ c->cpu_index = id;
identify_cpu(c);
print_cpu_info(c);
}
@@ -237,13 +238,13 @@ void __cpuinit smp_callin(void)
/* maps the cpu to the sched domain representing multi-core */
cpumask_t cpu_coregroup_map(int cpu)
{
- struct cpuinfo_x86 *c = cpu_data + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
/*
* For perf, we return last level cache shared map.
* And for power savings, we return cpu_core_map
*/
if (sched_mc_power_savings || sched_smt_power_savings)
- return cpu_core_map[cpu];
+ return per_cpu(cpu_core_map, cpu);
else
return c->llc_shared_map;
}
@@ -254,61 +255,61 @@ static cpumask_t cpu_sibling_setup_map;
static inline void set_cpu_sibling_map(int cpu)
{
int i;
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
cpu_set(cpu, cpu_sibling_setup_map);
if (smp_num_siblings > 1) {
for_each_cpu_mask(i, cpu_sibling_setup_map) {
- if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
- c[cpu].cpu_core_id == c[i].cpu_core_id) {
- cpu_set(i, cpu_sibling_map[cpu]);
- cpu_set(cpu, cpu_sibling_map[i]);
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
- cpu_set(i, c[cpu].llc_shared_map);
- cpu_set(cpu, c[i].llc_shared_map);
+ if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
+ c->cpu_core_id == cpu_data(i).cpu_core_id) {
+ cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+ cpu_set(i, per_cpu(cpu_core_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_core_map, i));
+ cpu_set(i, c->llc_shared_map);
+ cpu_set(cpu, cpu_data(i).llc_shared_map);
}
}
} else {
- cpu_set(cpu, cpu_sibling_map[cpu]);
+ cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
}
- cpu_set(cpu, c[cpu].llc_shared_map);
+ cpu_set(cpu, c->llc_shared_map);
if (current_cpu_data.x86_max_cores == 1) {
- cpu_core_map[cpu] = cpu_sibling_map[cpu];
- c[cpu].booted_cores = 1;
+ per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
+ c->booted_cores = 1;
return;
}
for_each_cpu_mask(i, cpu_sibling_setup_map) {
- if (cpu_llc_id[cpu] != BAD_APICID &&
- cpu_llc_id[cpu] == cpu_llc_id[i]) {
- cpu_set(i, c[cpu].llc_shared_map);
- cpu_set(cpu, c[i].llc_shared_map);
+ if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
+ per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
+ cpu_set(i, c->llc_shared_map);
+ cpu_set(cpu, cpu_data(i).llc_shared_map);
}
- if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
+ if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
+ cpu_set(i, per_cpu(cpu_core_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_core_map, i));
/*
* Does this new cpu bringup a new core?
*/
- if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
/*
* for each core in package, increment
* the booted_cores for this new cpu
*/
- if (first_cpu(cpu_sibling_map[i]) == i)
- c[cpu].booted_cores++;
+ if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
+ c->booted_cores++;
/*
* increment the core count for all
* the other cpus in this package
*/
if (i != cpu)
- c[i].booted_cores++;
- } else if (i != cpu && !c[cpu].booted_cores)
- c[cpu].booted_cores = c[i].booted_cores;
+ cpu_data(i).booted_cores++;
+ } else if (i != cpu && !c->booted_cores)
+ c->booted_cores = cpu_data(i).booted_cores;
}
}
}
@@ -350,7 +351,7 @@ void __cpuinit start_secondary(void)
/*
* We need to hold call_lock, so there is no inconsistency
* between the time smp_call_function() determines number of
- * IPI receipients, and the time when the determination is made
+ * IPI recipients, and the time when the determination is made
* for which cpus receive the IPI in genapic_flat.c. Holding this
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
@@ -387,7 +388,7 @@ static void inquire_remote_apic(int apicid)
printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
printk("... APIC #%d %s: ", apicid, names[i]);
/*
@@ -694,8 +695,7 @@ do_rest:
clear_node_cpumask(cpu); /* was set by numa_add_cpu */
cpu_clear(cpu, cpu_present_map);
cpu_clear(cpu, cpu_possible_map);
- x86_cpu_to_apicid[cpu] = BAD_APICID;
- x86_cpu_to_log_apicid[cpu] = BAD_APICID;
+ per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
return -EIO;
}
@@ -735,8 +735,8 @@ static __init void disable_smp(void)
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
else
phys_cpu_present_map = physid_mask_of_physid(0);
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, per_cpu(cpu_core_map, 0));
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -842,6 +842,26 @@ static int __init smp_sanity_check(unsigned max_cpus)
}
/*
+ * Copy apicid's found by MP_processor_info from initial array to the per cpu
+ * data area. The x86_cpu_to_apicid_init array is then expendable and the
+ * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no
+ * longer available.
+ */
+void __init smp_set_apicids(void)
+{
+ int cpu;
+
+ for_each_cpu_mask(cpu, cpu_possible_map) {
+ if (per_cpu_offset(cpu))
+ per_cpu(x86_cpu_to_apicid, cpu) =
+ x86_cpu_to_apicid_init[cpu];
+ }
+
+ /* indicate the static array will be going away soon */
+ x86_cpu_to_apicid_ptr = NULL;
+}
+
+/*
* Prepare for SMP bootup. The MP table or ACPI has been read
* earlier. Just do some sanity checking here and enable APIC mode.
*/
@@ -850,6 +870,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
nmi_watchdog_default();
current_cpu_data = boot_cpu_data;
current_thread_info()->cpu = 0; /* needed? */
+ smp_set_apicids();
set_cpu_sibling_map(0);
if (smp_sanity_check(max_cpus) < 0) {
@@ -969,23 +990,23 @@ void __init smp_cpus_done(unsigned int max_cpus)
static void remove_siblinginfo(int cpu)
{
int sibling;
- struct cpuinfo_x86 *c = cpu_data;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
- for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
- cpu_clear(cpu, cpu_core_map[sibling]);
+ for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
+ cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
/*
* last thread sibling in this cpu core going down
*/
- if (cpus_weight(cpu_sibling_map[cpu]) == 1)
- c[sibling].booted_cores--;
+ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
+ cpu_data(sibling).booted_cores--;
}
- for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[sibling]);
- cpus_clear(cpu_sibling_map[cpu]);
- cpus_clear(cpu_core_map[cpu]);
- c[cpu].phys_proc_id = 0;
- c[cpu].cpu_core_id = 0;
+ for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
+ cpus_clear(per_cpu(cpu_sibling_map, cpu));
+ cpus_clear(per_cpu(cpu_core_map, cpu));
+ c->phys_proc_id = 0;
+ c->cpu_core_id = 0;
cpu_clear(cpu, cpu_sibling_setup_map);
}
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 413e527cdeb..6fa6cf036c7 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -33,7 +33,7 @@ static void save_stack_address(void *data, unsigned long addr)
trace->entries[trace->nr_entries++] = addr;
}
-static struct stacktrace_ops save_stack_ops = {
+static const struct stacktrace_ops save_stack_ops = {
.warning = save_stack_warning,
.warning_symbol = save_stack_warning_symbol,
.stack = save_stack_stack,
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index 91c7acc8d99..72f46340159 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -64,7 +64,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
switch (rio_devs[wpeg_num]->type){
case CompatWPEG:
- /* The Compatability Winnipeg controls the 2 legacy buses,
+ /* The Compatibility Winnipeg controls the 2 legacy buses,
* the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
* a PCI-PCI bridge card is used in either slot: total 5 buses.
*/
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index 573c0a6e0ac..db284ef44d5 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -19,12 +19,6 @@ extern const void __nosave_begin, __nosave_end;
struct saved_context saved_context;
-unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx;
-unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi;
-unsigned long saved_context_r08, saved_context_r09, saved_context_r10, saved_context_r11;
-unsigned long saved_context_r12, saved_context_r13, saved_context_r14, saved_context_r15;
-unsigned long saved_context_eflags;
-
void __save_processor_state(struct saved_context *ctxt)
{
kernel_fpu_begin();
@@ -32,9 +26,9 @@ void __save_processor_state(struct saved_context *ctxt)
/*
* descriptor tables
*/
- asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
- asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
- asm volatile ("str %0" : "=m" (ctxt->tr));
+ store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
+ store_idt((struct desc_ptr *)&ctxt->idt_limit);
+ store_tr(ctxt->tr);
/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
/*
@@ -91,8 +85,9 @@ void __restore_processor_state(struct saved_context *ctxt)
* now restore the descriptor tables to their proper values
* ltr is done i fix_processor_context().
*/
- asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
- asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
+ load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
+ load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+
/*
* segment registers
@@ -123,7 +118,7 @@ void fix_processor_context(void)
int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu);
- set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
+ set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9;
@@ -150,8 +145,22 @@ void fix_processor_context(void)
/* Defined in arch/x86_64/kernel/suspend_asm.S */
extern int restore_image(void);
+/*
+ * Address to jump to in the last phase of restore in order to get to the image
+ * kernel's text (this value is passed in the image header).
+ */
+unsigned long restore_jump_address;
+
+/*
+ * Value of the cr3 register from before the hibernation (this value is passed
+ * in the image header).
+ */
+unsigned long restore_cr3;
+
pgd_t *temp_level4_pgt;
+void *relocated_restore_code;
+
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
long i, j;
@@ -175,7 +184,7 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en
if (paddr >= end)
break;
- pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
+ pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
pe &= __supported_pte_mask;
set_pmd(pmd, __pmd(pe));
}
@@ -183,25 +192,42 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en
return 0;
}
+static int res_kernel_text_pud_init(pud_t *pud, unsigned long start)
+{
+ pmd_t *pmd;
+ unsigned long paddr;
+
+ pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pmd)
+ return -ENOMEM;
+ set_pud(pud + pud_index(start), __pud(__pa(pmd) | _KERNPG_TABLE));
+ for (paddr = 0; paddr < KERNEL_TEXT_SIZE; pmd++, paddr += PMD_SIZE) {
+ unsigned long pe;
+
+ pe = __PAGE_KERNEL_LARGE_EXEC | _PAGE_GLOBAL | paddr;
+ pe &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(pe));
+ }
+
+ return 0;
+}
+
static int set_up_temporary_mappings(void)
{
unsigned long start, end, next;
+ pud_t *pud;
int error;
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!temp_level4_pgt)
return -ENOMEM;
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
/* Set up the direct mapping from scratch */
start = (unsigned long)pfn_to_kaddr(0);
end = (unsigned long)pfn_to_kaddr(end_pfn);
for (; start < end; start = next) {
- pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
next = start + PGDIR_SIZE;
@@ -212,7 +238,17 @@ static int set_up_temporary_mappings(void)
set_pgd(temp_level4_pgt + pgd_index(start),
mk_kernel_pgd(__pa(pud)));
}
- return 0;
+
+ /* Set up the kernel text mapping from scratch */
+ pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!pud)
+ return -ENOMEM;
+ error = res_kernel_text_pud_init(pud, __START_KERNEL_map);
+ if (!error)
+ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+ __pgd(__pa(pud) | _PAGE_TABLE));
+
+ return error;
}
int swsusp_arch_resume(void)
@@ -222,6 +258,13 @@ int swsusp_arch_resume(void)
/* We have got enough memory and from now on we cannot recover */
if ((error = set_up_temporary_mappings()))
return error;
+
+ relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
+ if (!relocated_restore_code)
+ return -ENOMEM;
+ memcpy(relocated_restore_code, &core_restore_code,
+ &restore_registers - &core_restore_code);
+
restore_image();
return 0;
}
@@ -236,4 +279,43 @@ int pfn_is_nosave(unsigned long pfn)
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
+
+struct restore_data_record {
+ unsigned long jump_address;
+ unsigned long cr3;
+ unsigned long magic;
+};
+
+#define RESTORE_MAGIC 0x0123456789ABCDEFUL
+
+/**
+ * arch_hibernation_header_save - populate the architecture specific part
+ * of a hibernation image header
+ * @addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct restore_data_record *rdr = addr;
+
+ if (max_size < sizeof(struct restore_data_record))
+ return -EOVERFLOW;
+ rdr->jump_address = restore_jump_address;
+ rdr->cr3 = restore_cr3;
+ rdr->magic = RESTORE_MAGIC;
+ return 0;
+}
+
+/**
+ * arch_hibernation_header_restore - read the architecture specific data
+ * from the hibernation image header
+ * @addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+ struct restore_data_record *rdr = addr;
+
+ restore_jump_address = rdr->jump_address;
+ restore_cr3 = rdr->cr3;
+ return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}
#endif /* CONFIG_HIBERNATION */
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S
index 16d183f67bc..72f952103e5 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/kernel/suspend_asm_64.S
@@ -2,8 +2,8 @@
*
* Distribute under GPLv2.
*
- * swsusp_arch_resume may not use any stack, nor any variable that is
- * not "NoSave" during copying pages:
+ * swsusp_arch_resume must not use any stack or any nonlocal variables while
+ * copying pages:
*
* Its rewriting one kernel image with another. What is stack in "old"
* image could very well be data page in "new" image, and overwriting
@@ -17,24 +17,31 @@
#include <asm/asm-offsets.h>
ENTRY(swsusp_arch_suspend)
+ movq $saved_context, %rax
+ movq %rsp, pt_regs_rsp(%rax)
+ movq %rbp, pt_regs_rbp(%rax)
+ movq %rsi, pt_regs_rsi(%rax)
+ movq %rdi, pt_regs_rdi(%rax)
+ movq %rbx, pt_regs_rbx(%rax)
+ movq %rcx, pt_regs_rcx(%rax)
+ movq %rdx, pt_regs_rdx(%rax)
+ movq %r8, pt_regs_r8(%rax)
+ movq %r9, pt_regs_r9(%rax)
+ movq %r10, pt_regs_r10(%rax)
+ movq %r11, pt_regs_r11(%rax)
+ movq %r12, pt_regs_r12(%rax)
+ movq %r13, pt_regs_r13(%rax)
+ movq %r14, pt_regs_r14(%rax)
+ movq %r15, pt_regs_r15(%rax)
+ pushfq
+ popq pt_regs_eflags(%rax)
- movq %rsp, saved_context_esp(%rip)
- movq %rax, saved_context_eax(%rip)
- movq %rbx, saved_context_ebx(%rip)
- movq %rcx, saved_context_ecx(%rip)
- movq %rdx, saved_context_edx(%rip)
- movq %rbp, saved_context_ebp(%rip)
- movq %rsi, saved_context_esi(%rip)
- movq %rdi, saved_context_edi(%rip)
- movq %r8, saved_context_r08(%rip)
- movq %r9, saved_context_r09(%rip)
- movq %r10, saved_context_r10(%rip)
- movq %r11, saved_context_r11(%rip)
- movq %r12, saved_context_r12(%rip)
- movq %r13, saved_context_r13(%rip)
- movq %r14, saved_context_r14(%rip)
- movq %r15, saved_context_r15(%rip)
- pushfq ; popq saved_context_eflags(%rip)
+ /* save the address of restore_registers */
+ movq $restore_registers, %rax
+ movq %rax, restore_jump_address(%rip)
+ /* save cr3 */
+ movq %cr3, %rax
+ movq %rax, restore_cr3(%rip)
call swsusp_save
ret
@@ -54,7 +61,17 @@ ENTRY(restore_image)
movq %rcx, %cr3;
movq %rax, %cr4; # turn PGE back on
+ /* prepare to jump to the image kernel */
+ movq restore_jump_address(%rip), %rax
+ movq restore_cr3(%rip), %rbx
+
+ /* prepare to copy image data to their original locations */
movq restore_pblist(%rip), %rdx
+ movq relocated_restore_code(%rip), %rcx
+ jmpq *%rcx
+
+ /* code below has been relocated to a safe page */
+ENTRY(core_restore_code)
loop:
testq %rdx, %rdx
jz done
@@ -62,7 +79,7 @@ loop:
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
movq pbe_orig_address(%rdx), %rdi
- movq $512, %rcx
+ movq $(PAGE_SIZE >> 3), %rcx
rep
movsq
@@ -70,10 +87,22 @@ loop:
movq pbe_next(%rdx), %rdx
jmp loop
done:
+ /* jump to the restore_registers address from the image header */
+ jmpq *%rax
+ /*
+ * NOTE: This assumes that the boot kernel's text mapping covers the
+ * image kernel's page containing restore_registers and the address of
+ * this page is the same as in the image kernel's text mapping (it
+ * should always be true, because the text mapping is linear, starting
+ * from 0, and is supposed to cover the entire kernel text for every
+ * kernel).
+ *
+ * code below belongs to the image kernel
+ */
+
+ENTRY(restore_registers)
/* go back to the original page tables */
- movq $(init_level4_pgt - __START_KERNEL_map), %rax
- addq phys_base(%rip), %rax
- movq %rax, %cr3
+ movq %rbx, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
@@ -84,27 +113,29 @@ done:
movq %rcx, %cr3
movq %rax, %cr4; # turn PGE back on
- movl $24, %eax
- movl %eax, %ds
-
- movq saved_context_esp(%rip), %rsp
- movq saved_context_ebp(%rip), %rbp
- /* Don't restore %rax, it must be 0 anyway */
- movq saved_context_ebx(%rip), %rbx
- movq saved_context_ecx(%rip), %rcx
- movq saved_context_edx(%rip), %rdx
- movq saved_context_esi(%rip), %rsi
- movq saved_context_edi(%rip), %rdi
- movq saved_context_r08(%rip), %r8
- movq saved_context_r09(%rip), %r9
- movq saved_context_r10(%rip), %r10
- movq saved_context_r11(%rip), %r11
- movq saved_context_r12(%rip), %r12
- movq saved_context_r13(%rip), %r13
- movq saved_context_r14(%rip), %r14
- movq saved_context_r15(%rip), %r15
- pushq saved_context_eflags(%rip) ; popfq
+ /* We don't restore %rax, it must be 0 anyway */
+ movq $saved_context, %rax
+ movq pt_regs_rsp(%rax), %rsp
+ movq pt_regs_rbp(%rax), %rbp
+ movq pt_regs_rsi(%rax), %rsi
+ movq pt_regs_rdi(%rax), %rdi
+ movq pt_regs_rbx(%rax), %rbx
+ movq pt_regs_rcx(%rax), %rcx
+ movq pt_regs_rdx(%rax), %rdx
+ movq pt_regs_r8(%rax), %r8
+ movq pt_regs_r9(%rax), %r9
+ movq pt_regs_r10(%rax), %r10
+ movq pt_regs_r11(%rax), %r11
+ movq pt_regs_r12(%rax), %r12
+ movq pt_regs_r13(%rax), %r13
+ movq pt_regs_r14(%rax), %r14
+ movq pt_regs_r15(%rax), %r15
+ pushq pt_regs_eflags(%rax)
+ popfq
xorq %rax, %rax
+ /* tell the hibernation core that we've just restored the memory */
+ movq %rax, in_suspend(%rip)
+
ret
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index f8bae9ba032..a86d26f036e 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -17,10 +17,10 @@
#include <linux/mman.h>
#include <linux/file.h>
#include <linux/utsname.h>
+#include <linux/ipc.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
-#include <asm/ipc.h>
/*
* sys_pipe() is the normal C calling standard for creating
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index e3f2569b2c4..9e540fee700 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -40,9 +40,9 @@ static inline void flush_tce(void* tceaddr)
{
/* a single tce can't cross a cache line */
if (cpu_has_clflush)
- asm volatile("clflush (%0)" :: "r" (tceaddr));
+ clflush(tceaddr);
else
- asm volatile("wbinvd":::"memory");
+ wbinvd();
}
void tce_build(struct iommu_table *tbl, unsigned long index,
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index c25f23eb397..8caa0b77746 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -44,15 +44,15 @@ int arch_register_cpu(int num)
* Also certain PCI quirks require not to enable hotplug control
* for all CPU's.
*/
- if (num && enable_cpu_hotplug)
+#ifdef CONFIG_HOTPLUG_CPU
+ if (num)
cpu_devices[num].cpu.hotpluggable = 1;
+#endif
return register_cpu(&cpu_devices[num].cpu, num);
}
#ifdef CONFIG_HOTPLUG_CPU
-int enable_cpu_hotplug = 1;
-
void arch_unregister_cpu(int num) {
return unregister_cpu(&cpu_devices[num].cpu);
}
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index f62815f8d06..9bcc1c6aca3 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -36,11 +36,11 @@
#include <asm/segment.h>
#include <asm/page.h>
-.data
-
/* We can free up trampoline after bootup if cpu hotplug is not supported. */
#ifndef CONFIG_HOTPLUG_CPU
.section ".init.data","aw",@progbits
+#else
+.section .rodata,"a",@progbits
#endif
.code16
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 607983b0d27..e30b67c6a9f 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -33,7 +33,12 @@
#include <asm/msr.h>
#include <asm/segment.h>
-.data
+/* We can free up trampoline after bootup if cpu hotplug is not supported. */
+#ifndef CONFIG_HOTPLUG_CPU
+.section .init.data, "aw", @progbits
+#else
+.section .rodata, "a", @progbits
+#endif
.code16
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 05c27ecaf2a..298d13ed3ab 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -63,6 +63,9 @@
int panic_on_unrecovered_nmi;
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+EXPORT_SYMBOL_GPL(used_vectors);
+
asmlinkage int system_call(void);
/* Do we ignore FPU interrupts ? */
@@ -112,7 +115,7 @@ struct stack_frame {
static inline unsigned long print_context_stack(struct thread_info *tinfo,
unsigned long *stack, unsigned long ebp,
- struct stacktrace_ops *ops, void *data)
+ const struct stacktrace_ops *ops, void *data)
{
#ifdef CONFIG_FRAME_POINTER
struct stack_frame *frame = (struct stack_frame *)ebp;
@@ -149,7 +152,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack,
- struct stacktrace_ops *ops, void *data)
+ const struct stacktrace_ops *ops, void *data)
{
unsigned long ebp = 0;
@@ -221,7 +224,7 @@ static void print_trace_address(void *data, unsigned long addr)
touch_nmi_watchdog();
}
-static struct stacktrace_ops print_trace_ops = {
+static const struct stacktrace_ops print_trace_ops = {
.warning = print_trace_warning,
.warning_symbol = print_trace_warning_symbol,
.stack = print_trace_stack,
@@ -288,48 +291,24 @@ EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
{
int i;
- int in_kernel = 1;
- unsigned long esp;
- unsigned short ss, gs;
-
- esp = (unsigned long) (&regs->esp);
- savesegment(ss, ss);
- savesegment(gs, gs);
- if (user_mode_vm(regs)) {
- in_kernel = 0;
- esp = regs->esp;
- ss = regs->xss & 0xffff;
- }
+
print_modules();
- printk(KERN_EMERG "CPU: %d\n"
- KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
- KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
- smp_processor_id(), 0xffff & regs->xcs, regs->eip,
- print_tainted(), regs->eflags, init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
- printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- regs->eax, regs->ebx, regs->ecx, regs->edx);
- printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- regs->esi, regs->edi, regs->ebp, esp);
- printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
+ __show_registers(regs, 0);
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
- TASK_COMM_LEN, current->comm, current->pid,
+ TASK_COMM_LEN, current->comm, task_pid_nr(current),
current_thread_info(), current, task_thread_info(current));
/*
* When in-kernel, we also print out the stack and code at the
* time of the fault..
*/
- if (in_kernel) {
+ if (!user_mode_vm(regs)) {
u8 *eip;
unsigned int code_prologue = code_bytes * 43 / 64;
unsigned int code_len = code_bytes;
unsigned char c;
printk("\n" KERN_EMERG "Stack: ");
- show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
+ show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);
printk(KERN_EMERG "Code: ");
@@ -374,11 +353,11 @@ int is_valid_bugaddr(unsigned long eip)
void die(const char * str, struct pt_regs * regs, long err)
{
static struct {
- spinlock_t lock;
+ raw_spinlock_t lock;
u32 lock_owner;
int lock_owner_depth;
} die = {
- .lock = __SPIN_LOCK_UNLOCKED(die.lock),
+ .lock = __RAW_SPIN_LOCK_UNLOCKED,
.lock_owner = -1,
.lock_owner_depth = 0
};
@@ -389,40 +368,34 @@ void die(const char * str, struct pt_regs * regs, long err)
if (die.lock_owner != raw_smp_processor_id()) {
console_verbose();
- spin_lock_irqsave(&die.lock, flags);
+ __raw_spin_lock(&die.lock);
+ raw_local_save_flags(flags);
die.lock_owner = smp_processor_id();
die.lock_owner_depth = 0;
bust_spinlocks(1);
}
else
- local_save_flags(flags);
+ raw_local_save_flags(flags);
if (++die.lock_owner_depth < 3) {
- int nl = 0;
unsigned long esp;
unsigned short ss;
report_bug(regs->eip, regs);
- printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+ printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff,
+ ++die_counter);
#ifdef CONFIG_PREEMPT
- printk(KERN_EMERG "PREEMPT ");
- nl = 1;
+ printk("PREEMPT ");
#endif
#ifdef CONFIG_SMP
- if (!nl)
- printk(KERN_EMERG);
printk("SMP ");
- nl = 1;
#endif
#ifdef CONFIG_DEBUG_PAGEALLOC
- if (!nl)
- printk(KERN_EMERG);
printk("DEBUG_PAGEALLOC");
- nl = 1;
#endif
- if (nl)
- printk("\n");
+ printk("\n");
+
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_no, SIGSEGV) !=
NOTIFY_STOP) {
@@ -446,7 +419,8 @@ void die(const char * str, struct pt_regs * regs, long err)
bust_spinlocks(0);
die.lock_owner = -1;
add_taint(TAINT_DIE);
- spin_unlock_irqrestore(&die.lock, flags);
+ __raw_spin_unlock(&die.lock);
+ raw_local_irq_restore(flags);
if (!regs)
return;
@@ -629,7 +603,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
printk_ratelimit())
printk(KERN_INFO
"%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
- current->comm, current->pid,
+ current->comm, task_pid_nr(current),
regs->eip, regs->esp, error_code);
force_sig(SIGSEGV, current);
@@ -815,6 +789,8 @@ void restart_nmi(void)
#ifdef CONFIG_KPROBES
fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
+ trace_hardirqs_fixup();
+
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
@@ -1112,20 +1088,6 @@ asmlinkage void math_emulate(long arg)
#endif /* CONFIG_MATH_EMULATION */
-#ifdef CONFIG_X86_F00F_BUG
-void __init trap_init_f00f_bug(void)
-{
- __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
-
- /*
- * Update the IDT descriptor and reload the IDT so that
- * it uses the read-only mapped virtual address.
- */
- idt_descr.address = fix_to_virt(FIX_F00F_IDT);
- load_idt(&idt_descr);
-}
-#endif
-
/*
* This needs to use 'idt_table' rather than 'idt', and
* thus use the _nonmapped_ version of the IDT, as the
@@ -1163,6 +1125,8 @@ static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
void __init trap_init(void)
{
+ int i;
+
#ifdef CONFIG_EISA
void __iomem *p = ioremap(0x0FFFD9, 4);
if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
@@ -1222,6 +1186,11 @@ void __init trap_init(void)
set_system_gate(SYSCALL_VECTOR,&system_call);
+ /* Reserve all the builtin and the syscall vector. */
+ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+ set_bit(i, used_vectors);
+ set_bit(SYSCALL_VECTOR, used_vectors);
+
/*
* Should be a barrier for any external CPU state.
*/
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index bc7116acf8f..4a6bd4965f5 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -201,7 +201,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
#define MSG(txt) ops->warning(data, txt)
/*
- * x86-64 can have upto three kernel stacks:
+ * x86-64 can have up to three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -215,7 +215,7 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
unsigned long *stack,
- struct stacktrace_ops *ops, void *data)
+ const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
@@ -336,7 +336,7 @@ static void print_trace_address(void *data, unsigned long addr)
printk_address(addr);
}
-static struct stacktrace_ops print_trace_ops = {
+static const struct stacktrace_ops print_trace_ops = {
.warning = print_trace_warning,
.warning_symbol = print_trace_warning_symbol,
.stack = print_trace_stack,
@@ -462,7 +462,7 @@ void out_of_line_bug(void)
EXPORT_SYMBOL(out_of_line_bug);
#endif
-static DEFINE_SPINLOCK(die_lock);
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
@@ -474,13 +474,13 @@ unsigned __kprobes long oops_begin(void)
oops_enter();
/* racy, but better than risking deadlock. */
- local_irq_save(flags);
+ raw_local_irq_save(flags);
cpu = smp_processor_id();
- if (!spin_trylock(&die_lock)) {
+ if (!__raw_spin_trylock(&die_lock)) {
if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
- spin_lock(&die_lock);
+ __raw_spin_lock(&die_lock);
}
die_nest_count++;
die_owner = cpu;
@@ -494,12 +494,10 @@ void __kprobes oops_end(unsigned long flags)
die_owner = -1;
bust_spinlocks(0);
die_nest_count--;
- if (die_nest_count)
- /* We still own the lock */
- local_irq_restore(flags);
- else
+ if (!die_nest_count)
/* Nest count reaches zero, release the lock. */
- spin_unlock_irqrestore(&die_lock, flags);
+ __raw_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
if (panic_on_oops)
panic("Fatal exception");
oops_exit();
@@ -809,6 +807,8 @@ asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
/* runs on IST stack. */
asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
{
+ trace_hardirqs_fixup();
+
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
return;
}
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index b85ad754f70..9ebc0dab66b 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -59,7 +59,7 @@ int check_tsc_unstable(void)
}
EXPORT_SYMBOL_GPL(check_tsc_unstable);
-/* Accellerators for sched_clock()
+/* Accelerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
@@ -74,7 +74,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
* And since SC is a constant power of two, we can convert the div
* into a shift.
*
- * We can use khz divisor instead of mhz to keep a better percision, since
+ * We can use khz divisor instead of mhz to keep a better precision, since
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
* (mathieu.desnoyers@polymtl.ca)
*
@@ -131,38 +131,43 @@ unsigned long native_calculate_cpu_khz(void)
{
unsigned long long start, end;
unsigned long count;
- u64 delta64;
+ u64 delta64 = (u64)ULLONG_MAX;
int i;
unsigned long flags;
local_irq_save(flags);
- /* run 3 times to ensure the cache is warm */
+ /* run 3 times to ensure the cache is warm and to get an accurate reading */
for (i = 0; i < 3; i++) {
mach_prepare_counter();
rdtscll(start);
mach_countup(&count);
rdtscll(end);
- }
- /*
- * Error: ECTCNEVERSET
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- if (count <= 1)
- goto err;
- delta64 = end - start;
+ /*
+ * Error: ECTCNEVERSET
+ * The CTC wasn't reliable: we got a hit on the very first read,
+ * or the CPU was so fast/slow that the quotient wouldn't fit in
+ * 32 bits..
+ */
+ if (count <= 1)
+ continue;
+
+ /* cpu freq too slow: */
+ if ((end - start) <= CALIBRATE_TIME_MSEC)
+ continue;
+
+ /*
+ * We want the minimum time of all runs in case one of them
+ * is inaccurate due to SMI or other delay
+ */
+ delta64 = min(delta64, (end - start));
+ }
- /* cpu freq too fast: */
+ /* cpu freq too fast (or every run was bad): */
if (delta64 > (1ULL<<32))
goto err;
- /* cpu freq too slow: */
- if (delta64 <= CALIBRATE_TIME_MSEC)
- goto err;
-
delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
do_div(delta64,CALIBRATE_TIME_MSEC);
@@ -181,8 +186,8 @@ int recalibrate_cpu_khz(void)
if (cpu_has_tsc) {
cpu_khz = calculate_cpu_khz();
tsc_khz = cpu_khz;
- cpu_data[0].loops_per_jiffy =
- cpufreq_scale(cpu_data[0].loops_per_jiffy,
+ cpu_data(0).loops_per_jiffy =
+ cpufreq_scale(cpu_data(0).loops_per_jiffy,
cpu_khz_old, cpu_khz);
return 0;
} else
@@ -215,7 +220,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
return 0;
}
ref_freq = freq->old;
- loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+ loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy;
cpu_khz_ref = cpu_khz;
}
@@ -223,7 +228,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
(val == CPUFREQ_RESUMECHANGE)) {
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- cpu_data[freq->cpu].loops_per_jiffy =
+ cpu_data(freq->cpu).loops_per_jiffy =
cpufreq_scale(loops_per_jiffy_ref,
ref_freq, freq->new);
@@ -349,10 +354,10 @@ __cpuinit int unsynchronized_tsc(void)
static void __init check_geode_tsc_reliable(void)
{
- unsigned long val;
+ unsigned long res_low, res_high;
- rdmsrl(MSR_GEODE_BUSCONT_CONF0, val);
- if ((val & RTSC_SUSP))
+ rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+ if (res_low & RTSC_SUSP)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
}
#else
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 9f22e542c37..9c70af45b42 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -73,13 +73,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
struct cpufreq_freqs *freq = data;
unsigned long *lpj, dummy;
- if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
+ if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
return 0;
lpj = &dummy;
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
#ifdef CONFIG_SMP
- lpj = &cpu_data[freq->cpu].loops_per_jiffy;
+ lpj = &cpu_data(freq->cpu).loops_per_jiffy;
#else
lpj = &boot_cpu_data.loops_per_jiffy;
#endif
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 18673e0f193..f02bad68aba 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -134,21 +134,21 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
unsigned long eip, unsigned len)
{
switch (type) {
- case PARAVIRT_PATCH(irq_disable):
+ case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
return patch_internal(VMI_CALL_DisableInterrupts, len,
insns, eip);
- case PARAVIRT_PATCH(irq_enable):
+ case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
return patch_internal(VMI_CALL_EnableInterrupts, len,
insns, eip);
- case PARAVIRT_PATCH(restore_fl):
+ case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
return patch_internal(VMI_CALL_SetInterruptMask, len,
insns, eip);
- case PARAVIRT_PATCH(save_fl):
+ case PARAVIRT_PATCH(pv_irq_ops.save_fl):
return patch_internal(VMI_CALL_GetInterruptMask, len,
insns, eip);
- case PARAVIRT_PATCH(iret):
+ case PARAVIRT_PATCH(pv_cpu_ops.iret):
return patch_internal(VMI_CALL_IRET, len, insns, eip);
- case PARAVIRT_PATCH(irq_enable_sysexit):
+ case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
default:
break;
@@ -552,24 +552,22 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
}
#endif
-static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode)
+static void vmi_enter_lazy_cpu(void)
{
- static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode);
-
- if (!vmi_ops.set_lazy_mode)
- return;
+ paravirt_enter_lazy_cpu();
+ vmi_ops.set_lazy_mode(2);
+}
- /* Modes should never nest or overlap */
- BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE ||
- mode == PARAVIRT_LAZY_FLUSH));
+static void vmi_enter_lazy_mmu(void)
+{
+ paravirt_enter_lazy_mmu();
+ vmi_ops.set_lazy_mode(1);
+}
- if (mode == PARAVIRT_LAZY_FLUSH) {
- vmi_ops.set_lazy_mode(0);
- vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode));
- } else {
- vmi_ops.set_lazy_mode(mode);
- __get_cpu_var(lazy_mode) = mode;
- }
+static void vmi_leave_lazy(void)
+{
+ paravirt_leave_lazy(paravirt_get_lazy_mode());
+ vmi_ops.set_lazy_mode(0);
}
static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -690,9 +688,9 @@ do { \
reloc = call_vrom_long_func(vmi_rom, get_reloc, \
VMI_CALL_##vmicall); \
if (rel->type == VMI_RELOCATION_CALL_REL) \
- paravirt_ops.opname = (void *)rel->eip; \
+ opname = (void *)rel->eip; \
else if (rel->type == VMI_RELOCATION_NOP) \
- paravirt_ops.opname = (void *)vmi_nop; \
+ opname = (void *)vmi_nop; \
else if (rel->type != VMI_RELOCATION_NONE) \
printk(KERN_WARNING "VMI: Unknown relocation " \
"type %d for " #vmicall"\n",\
@@ -712,7 +710,7 @@ do { \
VMI_CALL_##vmicall); \
BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \
if (rel->type == VMI_RELOCATION_CALL_REL) { \
- paravirt_ops.opname = wrapper; \
+ opname = wrapper; \
vmi_ops.cache = (void *)rel->eip; \
} \
} while (0)
@@ -732,11 +730,11 @@ static inline int __init activate_vmi(void)
}
savesegment(cs, kernel_cs);
- paravirt_ops.paravirt_enabled = 1;
- paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
+ pv_info.paravirt_enabled = 1;
+ pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
+ pv_info.name = "vmi";
- paravirt_ops.patch = vmi_patch;
- paravirt_ops.name = "vmi";
+ pv_init_ops.patch = vmi_patch;
/*
* Many of these operations are ABI compatible with VMI.
@@ -754,26 +752,26 @@ static inline int __init activate_vmi(void)
*/
/* CPUID is special, so very special it gets wrapped like a present */
- para_wrap(cpuid, vmi_cpuid, cpuid, CPUID);
-
- para_fill(clts, CLTS);
- para_fill(get_debugreg, GetDR);
- para_fill(set_debugreg, SetDR);
- para_fill(read_cr0, GetCR0);
- para_fill(read_cr2, GetCR2);
- para_fill(read_cr3, GetCR3);
- para_fill(read_cr4, GetCR4);
- para_fill(write_cr0, SetCR0);
- para_fill(write_cr2, SetCR2);
- para_fill(write_cr3, SetCR3);
- para_fill(write_cr4, SetCR4);
- para_fill(save_fl, GetInterruptMask);
- para_fill(restore_fl, SetInterruptMask);
- para_fill(irq_disable, DisableInterrupts);
- para_fill(irq_enable, EnableInterrupts);
-
- para_fill(wbinvd, WBINVD);
- para_fill(read_tsc, RDTSC);
+ para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID);
+
+ para_fill(pv_cpu_ops.clts, CLTS);
+ para_fill(pv_cpu_ops.get_debugreg, GetDR);
+ para_fill(pv_cpu_ops.set_debugreg, SetDR);
+ para_fill(pv_cpu_ops.read_cr0, GetCR0);
+ para_fill(pv_mmu_ops.read_cr2, GetCR2);
+ para_fill(pv_mmu_ops.read_cr3, GetCR3);
+ para_fill(pv_cpu_ops.read_cr4, GetCR4);
+ para_fill(pv_cpu_ops.write_cr0, SetCR0);
+ para_fill(pv_mmu_ops.write_cr2, SetCR2);
+ para_fill(pv_mmu_ops.write_cr3, SetCR3);
+ para_fill(pv_cpu_ops.write_cr4, SetCR4);
+ para_fill(pv_irq_ops.save_fl, GetInterruptMask);
+ para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
+ para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
+ para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
+
+ para_fill(pv_cpu_ops.wbinvd, WBINVD);
+ para_fill(pv_cpu_ops.read_tsc, RDTSC);
/* The following we emulate with trap and emulate for now */
/* paravirt_ops.read_msr = vmi_rdmsr */
@@ -781,29 +779,38 @@ static inline int __init activate_vmi(void)
/* paravirt_ops.rdpmc = vmi_rdpmc */
/* TR interface doesn't pass TR value, wrap */
- para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR);
+ para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR);
/* LDT is special, too */
- para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
-
- para_fill(load_gdt, SetGDT);
- para_fill(load_idt, SetIDT);
- para_fill(store_gdt, GetGDT);
- para_fill(store_idt, GetIDT);
- para_fill(store_tr, GetTR);
- paravirt_ops.load_tls = vmi_load_tls;
- para_fill(write_ldt_entry, WriteLDTEntry);
- para_fill(write_gdt_entry, WriteGDTEntry);
- para_fill(write_idt_entry, WriteIDTEntry);
- para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
- para_fill(set_iopl_mask, SetIOPLMask);
- para_fill(io_delay, IODelay);
- para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
+ para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
+
+ para_fill(pv_cpu_ops.load_gdt, SetGDT);
+ para_fill(pv_cpu_ops.load_idt, SetIDT);
+ para_fill(pv_cpu_ops.store_gdt, GetGDT);
+ para_fill(pv_cpu_ops.store_idt, GetIDT);
+ para_fill(pv_cpu_ops.store_tr, GetTR);
+ pv_cpu_ops.load_tls = vmi_load_tls;
+ para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
+ para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
+ para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
+ para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
+ para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
+ para_fill(pv_cpu_ops.io_delay, IODelay);
+
+ para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
+ set_lazy_mode, SetLazyMode);
+ para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
+ set_lazy_mode, SetLazyMode);
+
+ para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
+ set_lazy_mode, SetLazyMode);
+ para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
+ set_lazy_mode, SetLazyMode);
/* user and kernel flush are just handled with different flags to FlushTLB */
- para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
- para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
- para_fill(flush_tlb_single, InvalPage);
+ para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
+ para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
+ para_fill(pv_mmu_ops.flush_tlb_single, InvalPage);
/*
* Until a standard flag format can be agreed on, we need to
@@ -819,41 +826,41 @@ static inline int __init activate_vmi(void)
#endif
if (vmi_ops.set_pte) {
- paravirt_ops.set_pte = vmi_set_pte;
- paravirt_ops.set_pte_at = vmi_set_pte_at;
- paravirt_ops.set_pmd = vmi_set_pmd;
+ pv_mmu_ops.set_pte = vmi_set_pte;
+ pv_mmu_ops.set_pte_at = vmi_set_pte_at;
+ pv_mmu_ops.set_pmd = vmi_set_pmd;
#ifdef CONFIG_X86_PAE
- paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
- paravirt_ops.set_pte_present = vmi_set_pte_present;
- paravirt_ops.set_pud = vmi_set_pud;
- paravirt_ops.pte_clear = vmi_pte_clear;
- paravirt_ops.pmd_clear = vmi_pmd_clear;
+ pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
+ pv_mmu_ops.set_pte_present = vmi_set_pte_present;
+ pv_mmu_ops.set_pud = vmi_set_pud;
+ pv_mmu_ops.pte_clear = vmi_pte_clear;
+ pv_mmu_ops.pmd_clear = vmi_pmd_clear;
#endif
}
if (vmi_ops.update_pte) {
- paravirt_ops.pte_update = vmi_update_pte;
- paravirt_ops.pte_update_defer = vmi_update_pte_defer;
+ pv_mmu_ops.pte_update = vmi_update_pte;
+ pv_mmu_ops.pte_update_defer = vmi_update_pte_defer;
}
vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
if (vmi_ops.allocate_page) {
- paravirt_ops.alloc_pt = vmi_allocate_pt;
- paravirt_ops.alloc_pd = vmi_allocate_pd;
- paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+ pv_mmu_ops.alloc_pt = vmi_allocate_pt;
+ pv_mmu_ops.alloc_pd = vmi_allocate_pd;
+ pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
}
vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
if (vmi_ops.release_page) {
- paravirt_ops.release_pt = vmi_release_pt;
- paravirt_ops.release_pd = vmi_release_pd;
+ pv_mmu_ops.release_pt = vmi_release_pt;
+ pv_mmu_ops.release_pd = vmi_release_pd;
}
/* Set linear is needed in all cases */
vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
#ifdef CONFIG_HIGHPTE
if (vmi_ops.set_linear_mapping)
- paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
+ pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
#endif
/*
@@ -863,17 +870,17 @@ static inline int __init activate_vmi(void)
* the backend. They are performance critical anyway, so requiring
* a patch is not a big problem.
*/
- paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0;
- paravirt_ops.iret = (void *)0xbadbab0;
+ pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
+ pv_cpu_ops.iret = (void *)0xbadbab0;
#ifdef CONFIG_SMP
- para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
+ para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
- para_fill(apic_read, APICRead);
- para_fill(apic_write, APICWrite);
- para_fill(apic_write_atomic, APICWrite);
+ para_fill(pv_apic_ops.apic_read, APICRead);
+ para_fill(pv_apic_ops.apic_write, APICWrite);
+ para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
#endif
/*
@@ -891,15 +898,15 @@ static inline int __init activate_vmi(void)
vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
vmi_timer_ops.cancel_alarm =
vmi_get_function(VMI_CALL_CancelAlarm);
- paravirt_ops.time_init = vmi_time_init;
- paravirt_ops.get_wallclock = vmi_get_wallclock;
- paravirt_ops.set_wallclock = vmi_set_wallclock;
+ pv_time_ops.time_init = vmi_time_init;
+ pv_time_ops.get_wallclock = vmi_get_wallclock;
+ pv_time_ops.set_wallclock = vmi_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC
- paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
- paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
+ pv_apic_ops.setup_boot_clock = vmi_time_bsp_init;
+ pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
#endif
- paravirt_ops.sched_clock = vmi_sched_clock;
- paravirt_ops.get_cpu_khz = vmi_cpu_khz;
+ pv_time_ops.sched_clock = vmi_sched_clock;
+ pv_time_ops.get_cpu_khz = vmi_cpu_khz;
/* We have true wallclock functions; disable CMOS clock sync */
no_sync_cmos_clock = 1;
@@ -908,7 +915,7 @@ static inline int __init activate_vmi(void)
disable_vmi_timer = 1;
}
- para_fill(safe_halt, Halt);
+ para_fill(pv_irq_ops.safe_halt, Halt);
/*
* Alternative instruction rewriting doesn't happen soon enough
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 93847d84815..ad4005c6d4a 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -48,12 +48,12 @@
({unsigned long v; \
extern char __vsyscall_0; \
asm("" : "=r" (v) : "0" (x)); \
- ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
+ ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
/*
* vsyscall_gtod_data contains data that is :
* - readonly from vsyscalls
- * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
+ * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
* Try to keep this structure as small as possible to avoid cache line ping pongs
*/
int __vgetcpu_mode __section_vgetcpu_mode;
@@ -64,6 +64,16 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
.sysctl_enabled = 1,
};
+void update_vsyscall_tz(void)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ /* sys_tz has changed */
+ vsyscall_gtod_data.sys_tz = sys_tz;
+ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+}
+
void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
{
unsigned long flags;
@@ -77,8 +87,6 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
vsyscall_gtod_data.clock.shift = clock->shift;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
- vsyscall_gtod_data.sys_tz = sys_tz;
- vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
@@ -164,7 +172,7 @@ time_t __vsyscall(1) vtime(time_t *t)
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
return time_syscall(t);
- vgettimeofday(&tv, 0);
+ vgettimeofday(&tv, NULL);
result = tv.tv_sec;
if (t)
*t = result;
@@ -258,18 +266,10 @@ out:
return ret;
}
-static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
static ctl_table kernel_table2[] = {
- { .ctl_name = 99, .procname = "vsyscall64",
+ { .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
.mode = 0644,
- .strategy = vsyscall_sysctl_nostrat,
.proc_handler = vsyscall_sysctl_change },
{}
};
@@ -289,9 +289,9 @@ static void __cpuinit vsyscall_set_cpu(int cpu)
unsigned long *d;
unsigned long node = 0;
#ifdef CONFIG_NUMA
- node = cpu_to_node[cpu];
+ node = cpu_to_node(cpu);
#endif
- if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
+ if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
write_rdtscp_aux((node << 12) | cpu);
/* Store cpu number in limit so that it can be loaded quickly