aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig38
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c51
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apic_32.c105
-rw-r--r--arch/x86/kernel/apic_64.c251
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/common.c13
-rw-r--r--arch/x86/kernel/cpu/common_64.c40
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/e820.c28
-rw-r--r--arch/x86/kernel/es7000_32.c (renamed from arch/x86/mach-es7000/es7000plat.c)87
-rw-r--r--arch/x86/kernel/genapic_64.c88
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c164
-rw-r--r--arch/x86/kernel/genx2apic_phys.c159
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c70
-rw-r--r--arch/x86/kernel/i387.c154
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c47
-rw-r--r--arch/x86/kernel/io_apic_64.c639
-rw-r--r--arch/x86/kernel/ioport.c1
-rw-r--r--arch/x86/kernel/ipi.c3
-rw-r--r--arch/x86/kernel/irqinit_32.c49
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sigframe.h14
-rw-r--r--arch/x86/kernel/signal_32.c46
-rw-r--r--arch/x86/kernel/signal_64.c96
-rw-r--r--arch/x86/kernel/smpboot.c42
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/sys_i386_32.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c1
-rw-r--r--arch/x86/kernel/syscall_64.c4
-rw-r--r--arch/x86/kernel/time_32.c1
-rw-r--r--arch/x86/kernel/tls.c1
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/traps_64.c6
-rw-r--r--arch/x86/kernel/tsc.c235
-rw-r--r--arch/x86/kernel/vm86_32.c1
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/kernel/xsave.c316
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/lguest/boot.c38
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/mach-default/setup.c19
-rw-r--r--arch/x86/mach-es7000/Makefile5
-rw-r--r--arch/x86/mach-es7000/es7000.h114
-rw-r--r--arch/x86/mach-generic/Makefile1
-rw-r--r--arch/x86/mach-generic/bigsmp.c9
-rw-r--r--arch/x86/mach-generic/es7000.c13
-rw-r--r--arch/x86/mach-generic/numaq.c12
-rw-r--r--arch/x86/mach-generic/summit.c11
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/pci/acpi.c5
-rw-r--r--arch/x86/pci/i386.c90
-rw-r--r--arch/x86/pci/mmconfig-shared.c12
-rw-r--r--arch/x86/power/cpu_32.c7
-rw-r--r--arch/x86/power/cpu_64.c7
-rw-r--r--arch/x86/xen/enlighten.c45
69 files changed, 2521 insertions, 759 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 68d91c8233f..21ef9dd3618 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -577,35 +577,29 @@ config SWIOTLB
config IOMMU_HELPER
def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+
config MAXSMP
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
- depends on X86_64 && SMP
+ depends on X86_64 && SMP && BROKEN
default n
help
Configure maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
-if MAXSMP
config NR_CPUS
- int
- default "4096"
-endif
-
-if !MAXSMP
-config NR_CPUS
- int "Maximum number of CPUs (2-4096)"
- range 2 4096
+ int "Maximum number of CPUs (2-512)" if !MAXSMP
+ range 2 512
depends on SMP
+ default "4096" if MAXSMP
default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
default "8"
help
This allows you to specify the maximum number of CPUs which this
- kernel will support. The maximum supported value is 4096 and the
+ kernel will support. The maximum supported value is 512 and the
minimum value which makes sense is 2.
This is purely to save memory - each supported CPU adds
approximately eight kilobytes to the kernel image.
-endif
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
@@ -996,17 +990,10 @@ config NUMA_EMU
into virtual nodes when booted with "numa=fake=N", where N is the
number of nodes. This is only useful for debugging.
-if MAXSMP
-
config NODES_SHIFT
- int
- default "9"
-endif
-
-if !MAXSMP
-config NODES_SHIFT
- int "Maximum NUMA Nodes (as a power of 2)"
+ int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
range 1 9 if X86_64
+ default "9" if MAXSMP
default "6" if X86_64
default "4" if X86_NUMAQ
default "3"
@@ -1014,7 +1001,6 @@ config NODES_SHIFT
help
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accomodate various tables.
-endif
config HAVE_ARCH_BOOTMEM_NODE
def_bool y
@@ -1657,6 +1643,14 @@ config DMAR_FLOPPY_WA
workaround will setup a 1:1 mapping for the first
16M to make floppy (an ISA device) work.
+config INTR_REMAP
+ bool "Support for Interrupt Remapping (EXPERIMENTAL)"
+ depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
+ help
+ Supports Interrupt remapping for IO-APIC and MSI devices.
+ To use x2apic mode in the CPU's which support x2APIC enhancements or
+ to support platforms with CPU's having > 8 bit APIC ID, say Y.
+
source "drivers/pci/pcie/Kconfig"
source "drivers/pci/Kconfig"
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 9fea7370647..aaf5a2131ef 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -16,7 +16,7 @@
*/
#undef CONFIG_PARAVIRT
#ifdef CONFIG_X86_32
-#define _ASM_DESC_H_ 1
+#define ASM_X86__DESC_H 1
#endif
#ifdef CONFIG_X86_64
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 20af4c79579..f25a1012400 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -179,9 +179,10 @@ struct sigframe
u32 pretcode;
int sig;
struct sigcontext_ia32 sc;
- struct _fpstate_ia32 fpstate;
+ struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
unsigned int extramask[_COMPAT_NSIG_WORDS-1];
char retcode[8];
+ /* fp state follows here */
};
struct rt_sigframe
@@ -192,8 +193,8 @@ struct rt_sigframe
u32 puc;
compat_siginfo_t info;
struct ucontext_ia32 uc;
- struct _fpstate_ia32 fpstate;
char retcode[8];
+ /* fp state follows here */
};
#define COPY(x) { \
@@ -215,7 +216,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
unsigned int *peax)
{
unsigned int tmpflags, gs, oldgs, err = 0;
- struct _fpstate_ia32 __user *buf;
+ void __user *buf;
u32 tmp;
/* Always make any pending restarted system calls return -EINTR */
@@ -259,26 +260,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
err |= __get_user(tmp, &sc->fpstate);
buf = compat_ptr(tmp);
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387_ia32(buf);
- } else {
- struct task_struct *me = current;
-
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
- }
+ err |= restore_i387_xstate_ia32(buf);
err |= __get_user(tmp, &sc->ax);
*peax = tmp;
return err;
-
-badframe:
- return 1;
}
asmlinkage long sys32_sigreturn(struct pt_regs *regs)
@@ -350,7 +337,7 @@ badframe:
*/
static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
- struct _fpstate_ia32 __user *fpstate,
+ void __user *fpstate,
struct pt_regs *regs, unsigned int mask)
{
int tmp, err = 0;
@@ -381,7 +368,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
err |= __put_user((u32)regs->flags, &sc->flags);
err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
- tmp = save_i387_ia32(fpstate);
+ tmp = save_i387_xstate_ia32(fpstate);
if (tmp < 0)
err = -EFAULT;
else {
@@ -402,7 +389,8 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
* Determine which stack to use..
*/
static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
- size_t frame_size)
+ size_t frame_size,
+ void **fpstate)
{
unsigned long sp;
@@ -421,6 +409,11 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
ka->sa.sa_restorer)
sp = (unsigned long) ka->sa.sa_restorer;
+ if (used_math()) {
+ sp = sp - sig_xstate_ia32_size;
+ *fpstate = (struct _fpstate_ia32 *) sp;
+ }
+
sp -= frame_size;
/* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0. */
@@ -434,6 +427,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
struct sigframe __user *frame;
void __user *restorer;
int err = 0;
+ void __user *fpstate = NULL;
/* copy_to_user optimizes that into a single 8 byte store */
static const struct {
@@ -448,7 +442,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
0,
};
- frame = get_sigframe(ka, regs, sizeof(*frame));
+ frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
@@ -457,8 +451,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
if (err)
goto give_sigsegv;
- err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs,
- set->sig[0]);
+ err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
if (err)
goto give_sigsegv;
@@ -522,6 +515,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct rt_sigframe __user *frame;
void __user *restorer;
int err = 0;
+ void __user *fpstate = NULL;
/* __copy_to_user optimizes that into a single 8 byte store */
static const struct {
@@ -537,7 +531,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
0,
};
- frame = get_sigframe(ka, regs, sizeof(*frame));
+ frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
@@ -550,13 +544,16 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
+ if (cpu_has_xsave)
+ err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+ err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec..c9be69fedb7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
-obj-y += i387.o
+obj-y += i387.o xsave.o
obj-y += ptrace.o
obj-y += ds.o
obj-$(CONFIG_X86_32) += tls.o
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
+obj-$(CONFIG_X86_ES7000) += es7000_32.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
obj-y += vsmp_64.o
obj-$(CONFIG_KPROBES) += kprobes.o
@@ -104,6 +105,8 @@ obj-$(CONFIG_OLPC) += olpc.o
ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
obj-y += bios_uv.o
+ obj-y += genx2apic_cluster.o
+ obj-y += genx2apic_phys.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index bfd10fd211c..27ef365e757 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -775,7 +775,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
set_fixmap_nocache(FIX_APIC_BASE, address);
if (boot_cpu_physical_apicid == -1U) {
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
#ifdef CONFIG_X86_32
apic_version[boot_cpu_physical_apicid] =
GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1351,7 +1351,9 @@ static void __init acpi_process_madt(void)
acpi_ioapic = 1;
smp_found_config = 1;
+#ifdef CONFIG_X86_32
setup_apic_routing();
+#endif
}
}
if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d982b..58427210505 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,13 +145,18 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14;
}
-void apic_wait_icr_idle(void)
+/*
+ * Paravirt kernels also might be using these below ops. So we still
+ * use generic apic_read()/apic_write(), which might be pointing to different
+ * ops in PARAVIRT case.
+ */
+void xapic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
{
u32 send_status;
int timeout;
@@ -167,16 +172,48 @@ u32 safe_apic_wait_icr_idle(void)
return send_status;
}
+void xapic_icr_write(u32 low, u32 id)
+{
+ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+ apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+ u32 icr1, icr2;
+
+ icr2 = apic_read(APIC_ICR2);
+ icr1 = apic_read(APIC_ICR);
+
+ return icr1 | ((u64)icr2 << 32);
+}
+
+static struct apic_ops xapic_ops = {
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = xapic_icr_read,
+ .icr_write = xapic_icr_write,
+ .wait_icr_idle = xapic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+EXPORT_SYMBOL_GPL(apic_ops);
+
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
void __cpuinit enable_NMI_through_LVT0(void)
{
- unsigned int v = APIC_DM_NMI;
+ unsigned int v;
- /* Level triggered for 82489DX */
+ /* unmask and set to NMI */
+ v = APIC_DM_NMI;
+
+ /* Level triggered for 82489DX (32bit mode) */
if (!lapic_is_integrated())
v |= APIC_LVT_LEVEL_TRIGGER;
+
apic_write(APIC_LVT0, v);
}
@@ -193,9 +230,13 @@ int get_physical_broadcast(void)
*/
int lapic_get_maxlvt(void)
{
- unsigned int v = apic_read(APIC_LVR);
+ unsigned int v;
- /* 82489DXs do not report # of LVT entries. */
+ v = apic_read(APIC_LVR);
+ /*
+ * - we always have APIC integrated on 64bit mode
+ * - 82489DXs do not report # of LVT entries
+ */
return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
}
@@ -1205,7 +1246,7 @@ void __init init_apic_mappings(void)
* default configuration (or the MP table is broken).
*/
if (boot_cpu_physical_apicid == -1U)
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
@@ -1242,7 +1283,7 @@ int __init APIC_init_uniprocessor(void)
* might be zero if read from MP tables. Get it from LAPIC.
*/
#ifdef CONFIG_CRASH_DUMP
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
#endif
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
@@ -1321,54 +1362,6 @@ void smp_error_interrupt(struct pt_regs *regs)
irq_exit();
}
-#ifdef CONFIG_SMP
-void __init smp_intr_init(void)
-{
- /*
- * IRQ0 must be given a fixed assignment and initialized,
- * because it's used before the IO-APIC is set up.
- */
- set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
-
- /*
- * The reschedule interrupt is a CPU-to-CPU reschedule-helper
- * IPI, driven by wakeup.
- */
- alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
- /* IPI for invalidation */
- alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
-
- /* IPI for generic function call */
- alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
- /* IPI for single call function */
- set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
- call_function_single_interrupt);
-}
-#endif
-
-/*
- * Initialize APIC interrupts
- */
-void __init apic_intr_init(void)
-{
-#ifdef CONFIG_SMP
- smp_intr_init();
-#endif
- /* self generated IPI for local APIC timer */
- alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
- /* IPI vectors for APIC spurious and error interrupts */
- alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
- alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-
- /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
- alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-}
-
/**
* connect_bsp_APIC - attach the APIC to the interrupt system
*/
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062e831..1a6011855af 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
#include <linux/clockchips.h>
#include <linux/acpi_pmtmr.h>
#include <linux/module.h>
+#include <linux/dmar.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -39,6 +40,7 @@
#include <asm/proto.h>
#include <asm/timex.h>
#include <asm/apic.h>
+#include <asm/i8259.h>
#include <mach_ipi.h>
#include <mach_apic.h>
@@ -46,6 +48,11 @@
static int disable_apic_timer __cpuinitdata;
static int apic_calibrate_pmtmr __initdata;
int disable_apic;
+int disable_x2apic;
+int x2apic;
+
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
/* Local APIC timer works in C2 */
int local_apic_timer_c2_ok;
@@ -118,13 +125,13 @@ static int modern_apic(void)
return lapic_get_version() >= 0x14;
}
-void apic_wait_icr_idle(void)
+void xapic_wait_icr_idle(void)
{
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
{
u32 send_status;
int timeout;
@@ -140,6 +147,69 @@ u32 safe_apic_wait_icr_idle(void)
return send_status;
}
+void xapic_icr_write(u32 low, u32 id)
+{
+ apic_write(APIC_ICR2, id << 24);
+ apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+ u32 icr1, icr2;
+
+ icr2 = apic_read(APIC_ICR2);
+ icr1 = apic_read(APIC_ICR);
+
+ return (icr1 | ((u64)icr2 << 32));
+}
+
+static struct apic_ops xapic_ops = {
+ .read = native_apic_mem_read,
+ .write = native_apic_mem_write,
+ .icr_read = xapic_icr_read,
+ .icr_write = xapic_icr_write,
+ .wait_icr_idle = xapic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+
+EXPORT_SYMBOL_GPL(apic_ops);
+
+static void x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+ /* no need to wait for icr idle in x2apic */
+ return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+ wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+ unsigned long val;
+
+ rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+ return val;
+}
+
+static struct apic_ops x2apic_ops = {
+ .read = native_apic_msr_read,
+ .write = native_apic_msr_write,
+ .icr_read = x2apic_icr_read,
+ .icr_write = x2apic_icr_write,
+ .wait_icr_idle = x2apic_wait_icr_idle,
+ .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+
/**
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
*/
@@ -149,6 +219,11 @@ void __cpuinit enable_NMI_through_LVT0(void)
/* unmask and set to NMI */
v = APIC_DM_NMI;
+
+ /* Level triggered for 82489DX (32bit mode) */
+ if (!lapic_is_integrated())
+ v |= APIC_LVT_LEVEL_TRIGGER;
+
apic_write(APIC_LVT0, v);
}
@@ -157,11 +232,14 @@ void __cpuinit enable_NMI_through_LVT0(void)
*/
int lapic_get_maxlvt(void)
{
- unsigned int v, maxlvt;
+ unsigned int v;
v = apic_read(APIC_LVR);
- maxlvt = GET_APIC_MAXLVT(v);
- return maxlvt;
+ /*
+ * - we always have APIC integrated on 64bit mode
+ * - 82489DXs do not report # of LVT entries
+ */
+ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
}
/*
@@ -629,10 +707,10 @@ int __init verify_local_APIC(void)
/*
* The ID register is read/write in a real APIC.
*/
- reg0 = read_apic_id();
+ reg0 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
- reg1 = read_apic_id();
+ reg1 = apic_read(APIC_ID);
apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
apic_write(APIC_ID, reg0);
if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -833,6 +911,125 @@ void __cpuinit end_local_APIC_setup(void)
apic_pm_activate();
}
+void check_x2apic(void)
+{
+ int msr, msr2;
+
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+ if (msr & X2APIC_ENABLE) {
+ printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+ x2apic_preenabled = x2apic = 1;
+ apic_ops = &x2apic_ops;
+ }
+}
+
+void enable_x2apic(void)
+{
+ int msr, msr2;
+
+ rdmsr(MSR_IA32_APICBASE, msr, msr2);
+ if (!(msr & X2APIC_ENABLE)) {
+ printk("Enabling x2apic\n");
+ wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+ }
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+ int ret;
+ unsigned long flags;
+
+ if (!cpu_has_x2apic)
+ return;
+
+ if (!x2apic_preenabled && disable_x2apic) {
+ printk(KERN_INFO
+ "Skipped enabling x2apic and Interrupt-remapping "
+ "because of nox2apic\n");
+ return;
+ }
+
+ if (x2apic_preenabled && disable_x2apic)
+ panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+ if (!x2apic_preenabled && skip_ioapic_setup) {
+ printk(KERN_INFO
+ "Skipped enabling x2apic and Interrupt-remapping "
+ "because of skipping io-apic setup\n");
+ return;
+ }
+
+ ret = dmar_table_init();
+ if (ret) {
+ printk(KERN_INFO
+ "dmar_table_init() failed with %d:\n", ret);
+
+ if (x2apic_preenabled)
+ panic("x2apic enabled by bios. But IR enabling failed");
+ else
+ printk(KERN_INFO
+ "Not enabling x2apic,Intr-remapping\n");
+ return;
+ }
+
+ local_irq_save(flags);
+ mask_8259A();
+ save_mask_IO_APIC_setup();
+
+ ret = enable_intr_remapping(1);
+
+ if (ret && x2apic_preenabled) {
+ local_irq_restore(flags);
+ panic("x2apic enabled by bios. But IR enabling failed");
+ }
+
+ if (ret)
+ goto end;
+
+ if (!x2apic) {
+ x2apic = 1;
+ apic_ops = &x2apic_ops;
+ enable_x2apic();
+ }
+end:
+ if (ret)
+ /*
+ * IR enabling failed
+ */
+ restore_IO_APIC_setup();
+ else
+ reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+ unmask_8259A();
+ local_irq_restore(flags);
+
+ if (!ret) {
+ if (!x2apic_preenabled)
+ printk(KERN_INFO
+ "Enabled x2apic and interrupt-remapping\n");
+ else
+ printk(KERN_INFO
+ "Enabled Interrupt-remapping\n");
+ } else
+ printk(KERN_ERR
+ "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+ if (!cpu_has_x2apic)
+ return;
+
+ if (x2apic_preenabled)
+ panic("x2apic enabled prior OS handover,"
+ " enable CONFIG_INTR_REMAP");
+
+ printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+ " and x2apic\n");
+#endif
+
+ return;
+}
+
/*
* Detect and enable local APICs on non-SMP boards.
* Original code written by Keir Fraser.
@@ -872,7 +1069,7 @@ void __init early_init_lapic_mapping(void)
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
/**
@@ -880,6 +1077,11 @@ void __init early_init_lapic_mapping(void)
*/
void __init init_apic_mappings(void)
{
+ if (x2apic) {
+ boot_cpu_physical_apicid = read_apic_id();
+ return;
+ }
+
/*
* If no local APIC can be found then set up a fake all
* zeroes page to simulate the local APIC and another
@@ -899,7 +1101,7 @@ void __init init_apic_mappings(void)
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
*/
- boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+ boot_cpu_physical_apicid = read_apic_id();
}
/*
@@ -918,6 +1120,9 @@ int __init APIC_init_uniprocessor(void)
return -1;
}
+ enable_IR_x2apic();
+ setup_apic_routing();
+
verify_local_APIC();
connect_bsp_APIC();
@@ -1093,6 +1298,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
cpu_set(cpu, cpu_present_map);
}
+int hard_smp_processor_id(void)
+{
+ return read_apic_id();
+}
+
/*
* Power management
*/
@@ -1129,7 +1339,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
maxlvt = lapic_get_maxlvt();
- apic_pm_state.apic_id = read_apic_id();
+ apic_pm_state.apic_id = apic_read(APIC_ID);
apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
apic_pm_state.apic_ldr = apic_read(APIC_LDR);
apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1164,10 +1374,14 @@ static int lapic_resume(struct sys_device *dev)
maxlvt = lapic_get_maxlvt();
local_irq_save(flags);
- rdmsr(MSR_IA32_APICBASE, l, h);
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
- wrmsr(MSR_IA32_APICBASE, l, h);
+ if (!x2apic) {
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ } else
+ enable_x2apic();
+
apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
apic_write(APIC_ID, apic_pm_state.apic_id);
apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1307,6 +1521,15 @@ __cpuinit int apic_is_clustered_box(void)
return (clusters > 2);
}
+static __init int setup_nox2apic(char *str)
+{
+ disable_x2apic = 1;
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
+ return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+
+
/*
* APIC command line parameters
*/
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387006f..505543a75a5 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
#define __NO_STUBS 1
#undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
#define __SYSCALL(nr, sym) [nr] = 1,
static char syscalls[] = {
#include <asm/unistd.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2b2c170e8d6..008c73796bb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -776,9 +776,20 @@ void __cpuinit cpu_init(void)
/*
* Force FPU initialization:
*/
- current_thread_info()->status = 0;
+ if (cpu_has_xsave)
+ current_thread_info()->status = TS_XSAVE;
+ else
+ current_thread_info()->status = 0;
clear_used_math();
mxcsr_feature_mask_init();
+
+ /*
+ * Boot processor to setup the FP and extended state context info.
+ */
+ if (!smp_processor_id())
+ init_thread_xstate();
+
+ xsave_init();
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index b2da0413532..ae007b3521c 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -628,17 +628,20 @@ void pda_init(int cpu)
/* others are initialized in smpboot.c */
pda->pcurrent = &init_task;
pda->irqstackptr = boot_cpu_stack;
+ pda->irqstackptr += IRQSTACKSIZE - 64;
} else {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d", cpu);
+ if (!pda->irqstackptr) {
+ pda->irqstackptr = (char *)
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+ if (!pda->irqstackptr)
+ panic("cannot allocate irqstack for cpu %d",
+ cpu);
+ pda->irqstackptr += IRQSTACKSIZE - 64;
+ }
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
}
-
- pda->irqstackptr += IRQSTACKSIZE-64;
}
char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
@@ -732,23 +735,28 @@ void __cpuinit cpu_init(void)
barrier();
check_efer();
+ if (cpu != 0 && x2apic)
+ enable_x2apic();
/*
* set up and load the per-CPU TSS
*/
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (!orig_ist->ist[0]) {
static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
};
- if (cpu) {
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
- if (!estacks)
- panic("Cannot allocate exception stack %ld %d\n",
- v, cpu);
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (cpu) {
+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+ if (!estacks)
+ panic("Cannot allocate exception "
+ "stack %ld %d\n", v, cpu);
+ }
+ estacks += PAGE_SIZE << order[v];
+ orig_ist->ist[v] = t->x86_tss.ist[v] =
+ (unsigned long)estacks;
}
- estacks += PAGE_SIZE << order[v];
- orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
}
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index b117d7f8a56..58ac5d3d436 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -729,7 +729,7 @@ struct var_mtrr_range_state {
mtrr_type type;
};
-struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
static int __initdata debug_print;
static int __init
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 9af89078f7b..e24d1bc47b4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -148,6 +148,9 @@ void __init e820_print_map(char *who)
case E820_NVS:
printk(KERN_CONT "(ACPI NVS)\n");
break;
+ case E820_UNUSABLE:
+ printk("(unusable)\n");
+ break;
default:
printk(KERN_CONT "type %u\n", e820.map[i].type);
break;
@@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type)
case E820_RAM: return "System RAM";
case E820_ACPI: return "ACPI Tables";
case E820_NVS: return "ACPI Non-volatile Storage";
+ case E820_UNUSABLE: return "Unusable memory";
default: return "reserved";
}
}
@@ -1267,6 +1271,7 @@ static inline const char *e820_type_to_string(int e820_type)
/*
* Mark e820 reserved areas as busy for the resource manager.
*/
+static struct resource __initdata *e820_res;
void __init e820_reserve_resources(void)
{
int i;
@@ -1274,6 +1279,7 @@ void __init e820_reserve_resources(void)
u64 end;
res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
+ e820_res = res;
for (i = 0; i < e820.nr_map; i++) {
end = e820.map[i].addr + e820.map[i].size - 1;
#ifndef CONFIG_RESOURCES_64BIT
@@ -1287,7 +1293,14 @@ void __init e820_reserve_resources(void)
res->end = end;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- insert_resource(&iomem_resource, res);
+
+ /*
+ * don't register the region that could be conflicted with
+ * pci device BAR resource and insert them later in
+ * pcibios_resource_survey()
+ */
+ if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20))
+ insert_resource(&iomem_resource, res);
res++;
}
@@ -1299,6 +1312,19 @@ void __init e820_reserve_resources(void)
}
}
+void __init e820_reserve_resources_late(void)
+{
+ int i;
+ struct resource *res;
+
+ res = e820_res;
+ for (i = 0; i < e820.nr_map; i++) {
+ if (!res->parent && res->end)
+ reserve_region_with_split(&iomem_resource, res->start, res->end, res->name);
+ res++;
+ }
+}
+
char *__init default_machine_specific_memory_setup(void)
{
char *who = "BIOS-e820";
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/kernel/es7000_32.c
index 50189af14b8..849e5cd485b 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -39,10 +39,93 @@
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/apicdef.h>
-#include "es7000.h"
#include <mach_mpparse.h>
/*
+ * ES7000 chipsets
+ */
+
+#define NON_UNISYS 0
+#define ES7000_CLASSIC 1
+#define ES7000_ZORRO 2
+
+
+#define MIP_REG 1
+#define MIP_PSAI_REG 4
+
+#define MIP_BUSY 1
+#define MIP_SPIN 0xf0000
+#define MIP_VALID 0x0100000000000000ULL
+#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
+
+#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
+
+struct mip_reg_info {
+ unsigned long long mip_info;
+ unsigned long long delivery_info;
+ unsigned long long host_reg;
+ unsigned long long mip_reg;
+};
+
+struct part_info {
+ unsigned char type;
+ unsigned char length;
+ unsigned char part_id;
+ unsigned char apic_mode;
+ unsigned long snum;
+ char ptype[16];
+ char sname[64];
+ char pname[64];
+};
+
+struct psai {
+ unsigned long long entry_type;
+ unsigned long long addr;
+ unsigned long long bep_addr;
+};
+
+struct es7000_mem_info {
+ unsigned char type;
+ unsigned char length;
+ unsigned char resv[6];
+ unsigned long long start;
+ unsigned long long size;
+};
+
+struct es7000_oem_table {
+ unsigned long long hdr;
+ struct mip_reg_info mip;
+ struct part_info pif;
+ struct es7000_mem_info shm;
+ struct psai psai;
+};
+
+#ifdef CONFIG_ACPI
+
+struct oem_table {
+ struct acpi_table_header Header;
+ u32 OEMTableAddr;
+ u32 OEMTableSize;
+};
+
+extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
+#endif
+
+struct mip_reg {
+ unsigned long long off_0;
+ unsigned long long off_8;
+ unsigned long long off_10;
+ unsigned long long off_18;
+ unsigned long long off_20;
+ unsigned long long off_28;
+ unsigned long long off_30;
+ unsigned long long off_38;
+};
+
+#define MIP_SW_APIC 0x1020b
+#define MIP_FUNC(VALUE) (VALUE & 0xff)
+
+/*
* ES7000 Globals
*/
@@ -72,7 +155,7 @@ es7000_rename_gsi(int ioapic, int gsi)
base += nr_ioapic_registers[i];
}
- if (!ioapic && (gsi < 16))
+ if (!ioapic && (gsi < 16))
gsi += base;
return gsi;
}
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bbb144..6c9bfc9e1e9 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/hardirq.h>
+#include <linux/dmar.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-DEFINE_PER_CPU(int, x2apic_extra_bits);
+extern struct genapic apic_flat;
+extern struct genapic apic_physflat;
+extern struct genapic apic_x2xpic_uv_x;
+extern struct genapic apic_x2apic_phys;
+extern struct genapic apic_x2apic_cluster;
struct genapic __read_mostly *genapic = &apic_flat;
-static enum uv_system_type uv_system_type;
+static struct genapic *apic_probe[] __initdata = {
+ &apic_x2apic_uv_x,
+ &apic_x2apic_phys,
+ &apic_x2apic_cluster,
+ &apic_physflat,
+ NULL,
+};
/*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
*/
void __init setup_apic_routing(void)
{
- if (uv_system_type == UV_NON_UNIQUE_APIC)
- genapic = &apic_x2apic_uv_x;
- else
-#ifdef CONFIG_ACPI
- /*
- * Quirk: some x86_64 machines can only use physical APIC mode
- * regardless of how many processors are present (x86_64 ES7000
- * is an example).
- */
- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
- genapic = &apic_physflat;
- else
-#endif
-
- if (max_physical_apicid < 8)
- genapic = &apic_flat;
- else
- genapic = &apic_physflat;
+ if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
+ if (!intr_remapping_enabled)
+ genapic = &apic_flat;
+ }
- printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+ if (genapic == &apic_flat) {
+ if (max_physical_apicid >= 8)
+ genapic = &apic_physflat;
+ printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+ }
}
/* Same for both flat and physical. */
-void send_IPI_self(int vector)
+void apic_send_IPI_self(int vector)
{
__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
}
int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- if (!strcmp(oem_id, "SGI")) {
- if (!strcmp(oem_table_id, "UVL"))
- uv_system_type = UV_LEGACY_APIC;
- else if (!strcmp(oem_table_id, "UVX"))
- uv_system_type = UV_X2APIC;
- else if (!strcmp(oem_table_id, "UVH"))
- uv_system_type = UV_NON_UNIQUE_APIC;
+ int i;
+
+ for (i = 0; apic_probe[i]; ++i) {
+ if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+ genapic = apic_probe[i];
+ printk(KERN_INFO "Setting APIC routing to %s.\n",
+ genapic->name);
+ return 1;
+ }
}
return 0;
}
-
-unsigned int read_apic_id(void)
-{
- unsigned int id;
-
- WARN_ON(preemptible() && num_online_cpus() > 1);
- id = apic_read(APIC_ID);
- if (uv_system_type >= UV_X2APIC)
- id |= __get_cpu_var(x2apic_extra_bits);
- return id;
-}
-
-enum uv_system_type get_uv_system_type(void)
-{
- return uv_system_type;
-}
-
-int is_uv_system(void)
-{
- return uv_system_type != UV_NONE;
-}
-EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d3..9eca5ba7a6b 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
+#include <linux/hardirq.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
+#include <mach_apicdef.h>
+
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+
+static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return 1;
+}
static cpumask_t flat_target_cpus(void)
{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
}
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = (((x)>>24) & 0xFFu);
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = ((id & 0xFFu)<<24);
+ return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+ unsigned int id;
+
+ id = get_apic_id(apic_read(APIC_ID));
+ return id;
+}
+
static int flat_apic_id_registered(void)
{
- return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+ return physid_isset(read_xapic_id(), phys_cpu_present_map);
}
static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
struct genapic apic_flat = {
.name = "flat",
+ .acpi_madt_oem_check = flat_acpi_madt_oem_check,
.int_delivery_mode = dest_LowestPrio,
.int_dest_mode = (APIC_DEST_LOGICAL != 0),
.target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
.send_IPI_all = flat_send_IPI_all,
.send_IPI_allbutself = flat_send_IPI_allbutself,
.send_IPI_mask = flat_send_IPI_mask,
+ .send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id,
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFu<<24),
};
/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
* We cannot use logical delivery in this case because the mask
* overflows, so use physical mode.
*/
+static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+#ifdef CONFIG_ACPI
+ /*
+ * Quirk: some x86_64 machines can only use physical APIC mode
+ * regardless of how many processors are present (x86_64 ES7000
+ * is an example).
+ */
+ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+ return 1;
+#endif
+
+ return 0;
+}
static cpumask_t physflat_target_cpus(void)
{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
struct genapic apic_physflat = {
.name = "physical flat",
+ .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_mask = physflat_send_IPI_mask,
+ .send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id,
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFu<<24),
};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 00000000000..fed9f68efd6
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,164 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+
+static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (cpu_has_x2apic)
+ return 1;
+
+ return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+ return cpumask_of_cpu(0);
+}
+
+/*
+ * for now each logical cpu is in its own vector allocation domain.
+ */
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+ cpumask_t domain = CPU_MASK_NONE;
+ cpu_set(cpu, domain);
+ return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+ unsigned int dest)
+{
+ unsigned long cfg;
+
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * send the IPI.
+ */
+ x2apic_icr_write(cfg, apicid);
+}
+
+/*
+ * for now, we send the IPI's one by one in the cpumask.
+ * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
+ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+ * writes.
+ */
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+ unsigned long flags;
+ unsigned long query_cpu;
+
+ local_irq_save(flags);
+ for_each_cpu_mask(query_cpu, mask) {
+ __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, APIC_DEST_LOGICAL);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+
+ cpu_clear(smp_processor_id(), mask);
+
+ if (!cpus_empty(mask))
+ x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+ x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+ return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = first_cpu(cpumask);
+ if ((unsigned)cpu < NR_CPUS)
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = x;
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = id;
+ return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+ return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+ return x2apic_read_id() >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+ return;
+}
+
+struct genapic apic_x2apic_cluster = {
+ .name = "cluster x2apic",
+ .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .int_delivery_mode = dest_LowestPrio,
+ .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+ .target_cpus = x2apic_target_cpus,
+ .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .apic_id_registered = x2apic_apic_id_registered,
+ .init_apic_ldr = init_x2apic_ldr,
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_self = x2apic_send_IPI_self,
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFFFFFFFu),
+};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 00000000000..958d537b4cc
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,159 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+static int x2apic_phys;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+ x2apic_phys = 1;
+ return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
+static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (cpu_has_x2apic && x2apic_phys)
+ return 1;
+
+ return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+ return cpumask_of_cpu(0);
+}
+
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+ cpumask_t domain = CPU_MASK_NONE;
+ cpu_set(cpu, domain);
+ return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+ unsigned int dest)
+{
+ unsigned long cfg;
+
+ cfg = __prepare_ICR(0, vector, dest);
+
+ /*
+ * send the IPI.
+ */
+ x2apic_icr_write(cfg, apicid);
+}
+
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+ unsigned long flags;
+ unsigned long query_cpu;
+
+ local_irq_save(flags);
+ for_each_cpu_mask(query_cpu, mask) {
+ __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ cpumask_t mask = cpu_online_map;
+
+ cpu_clear(smp_processor_id(), mask);
+
+ if (!cpus_empty(mask))
+ x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+ x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+ return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = first_cpu(cpumask);
+ if ((unsigned)cpu < NR_CPUS)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ id = x;
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ x = id;
+ return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+ return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+ return x2apic_read_id() >> index_msb;
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+ apic_write(APIC_SELF_IPI, vector);
+}
+
+void init_x2apic_ldr(void)
+{
+ return;
+}
+
+struct genapic apic_x2apic_phys = {
+ .name = "physical x2apic",
+ .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .int_delivery_mode = dest_Fixed,
+ .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+ .target_cpus = x2apic_target_cpus,
+ .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .apic_id_registered = x2apic_apic_id_registered,
+ .init_apic_ldr = init_x2apic_ldr,
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_self = x2apic_send_IPI_self,
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFFFFFFFu),
+};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfa837cb16b..ae2ffc8a400 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
-#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/bootmem.h>
#include <linux/module.h>
+#include <linux/hardirq.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
@@ -26,6 +26,36 @@
#include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h>
+DEFINE_PER_CPU(int, x2apic_extra_bits);
+
+static enum uv_system_type uv_system_type;
+
+static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ if (!strcmp(oem_id, "SGI")) {
+ if (!strcmp(oem_table_id, "UVL"))
+ uv_system_type = UV_LEGACY_APIC;
+ else if (!strcmp(oem_table_id, "UVX"))
+ uv_system_type = UV_X2APIC;
+ else if (!strcmp(oem_table_id, "UVH")) {
+ uv_system_type = UV_NON_UNIQUE_APIC;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+ return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+ return uv_system_type != UV_NONE;
+}
+EXPORT_SYMBOL_GPL(is_uv_system);
+
DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
@@ -123,6 +153,10 @@ static int uv_apic_id_registered(void)
return 1;
}
+static void uv_init_apic_ldr(void)
+{
+}
+
static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
{
int cpu;
@@ -138,9 +172,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
return BAD_APICID;
}
+static unsigned int get_apic_id(unsigned long x)
+{
+ unsigned int id;
+
+ WARN_ON(preemptible() && num_online_cpus() > 1);
+ id = x | __get_cpu_var(x2apic_extra_bits);
+
+ return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+ unsigned long x;
+
+ /* maskout x2apic_extra_bits ? */
+ x = id;
+ return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+ return get_apic_id(apic_read(APIC_ID));
+}
+
static unsigned int phys_pkg_id(int index_msb)
{
- return GET_APIC_ID(read_apic_id()) >> index_msb;
+ return uv_read_apic_id() >> index_msb;
}
#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +211,22 @@ static void uv_send_IPI_self(int vector)
struct genapic apic_x2apic_uv_x = {
.name = "UV large system",
+ .acpi_madt_oem_check = uv_acpi_madt_oem_check,
.int_delivery_mode = dest_Fixed,
.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
.target_cpus = uv_target_cpus,
.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
.apic_id_registered = uv_apic_id_registered,
+ .init_apic_ldr = uv_init_apic_ldr,
.send_IPI_all = uv_send_IPI_all,
.send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_mask = uv_send_IPI_mask,
/* ZZZ.send_IPI_self = uv_send_IPI_self, */
.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
.phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
+ .get_apic_id = get_apic_id,
+ .set_apic_id = set_apic_id,
+ .apic_id_mask = (0xFFFFFFFFu),
};
static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +465,5 @@ void __cpuinit uv_cpu_init(void)
if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
set_x2apic_extra_bits(uv_hub_info->pnode);
}
+
+
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8efb8..45723f1fe19 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -21,9 +21,12 @@
# include <asm/sigcontext32.h>
# include <asm/user32.h>
#else
-# define save_i387_ia32 save_i387
-# define restore_i387_ia32 restore_i387
+# define save_i387_xstate_ia32 save_i387_xstate
+# define restore_i387_xstate_ia32 restore_i387_xstate
# define _fpstate_ia32 _fpstate
+# define _xstate_ia32 _xstate
+# define sig_xstate_ia32_size sig_xstate_size
+# define fx_sw_reserved_ia32 fx_sw_reserved
# define user_i387_ia32_struct user_i387_struct
# define user32_fxsr_struct user_fxsr_struct
#endif
@@ -36,6 +39,7 @@
static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int xstate_size;
+unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
void __cpuinit mxcsr_feature_mask_init(void)
@@ -61,6 +65,11 @@ void __init init_thread_xstate(void)
return;
}
+ if (cpu_has_xsave) {
+ xsave_cntxt_init();
+ return;
+ }
+
if (cpu_has_fxsr)
xstate_size = sizeof(struct i387_fxsave_struct);
#ifdef CONFIG_X86_32
@@ -83,9 +92,19 @@ void __cpuinit fpu_init(void)
write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
+ /*
+ * Boot processor to setup the FP and extended state context info.
+ */
+ if (!smp_processor_id())
+ init_thread_xstate();
+ xsave_init();
+
mxcsr_feature_mask_init();
/* clean state in init */
- current_thread_info()->status = 0;
+ if (cpu_has_xsave)
+ current_thread_info()->status = TS_XSAVE;
+ else
+ current_thread_info()->status = 0;
clear_used_math();
}
#endif /* CONFIG_X86_64 */
@@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
*/
target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
+ /*
+ * update the header bits in the xsave header, indicating the
+ * presence of FP and SSE state.
+ */
+ if (cpu_has_xsave)
+ target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
+
return ret;
}
@@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!ret)
convert_to_fxsr(target, &env);
+ /*
+ * update the header bit in the xsave header, indicating the
+ * presence of FP.
+ */
+ if (cpu_has_xsave)
+ target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
return ret;
}
@@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
struct task_struct *tsk = current;
struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
- unlazy_fpu(tsk);
fp->status = fp->swd;
if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
return -1;
@@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
struct user_i387_ia32_struct env;
int err = 0;
- unlazy_fpu(tsk);
-
convert_from_fxsr(&env, tsk);
if (__copy_to_user(buf, &env, sizeof(env)))
return -1;
@@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
if (err)
return -1;
- if (__copy_to_user(&buf->_fxsr_env[0], fx,
- sizeof(struct i387_fxsave_struct)))
+ if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
return -1;
return 1;
}
-int save_i387_ia32(struct _fpstate_ia32 __user *buf)
+static int save_i387_xsave(void __user *buf)
+{
+ struct _fpstate_ia32 __user *fx = buf;
+ int err = 0;
+
+ if (save_i387_fxsave(fx) < 0)
+ return -1;
+
+ err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
+ sizeof(struct _fpx_sw_bytes));
+ err |= __put_user(FP_XSTATE_MAGIC2,
+ (__u32 __user *) (buf + sig_xstate_ia32_size
+ - FP_XSTATE_MAGIC2_SIZE));
+ if (err)
+ return -1;
+
+ return 1;
+}
+
+int save_i387_xstate_ia32(void __user *buf)
{
+ struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
+ struct task_struct *tsk = current;
+
if (!used_math())
return 0;
+
+ if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
+ return -EACCES;
/*
* This will cause a "finit" to be triggered by the next
* attempted FPU operation by the 'current' process.
@@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
if (!HAVE_HWFP) {
return fpregs_soft_get(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
- NULL, buf) ? -1 : 1;
+ NULL, fp) ? -1 : 1;
}
+ unlazy_fpu(tsk);
+
+ if (cpu_has_xsave)
+ return save_i387_xsave(fp);
if (cpu_has_fxsr)
- return save_i387_fxsave(buf);
+ return save_i387_fxsave(fp);
else
- return save_i387_fsave(buf);
+ return save_i387_fsave(fp);
}
static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
sizeof(struct i387_fsave_struct));
}
-static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
+static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
+ unsigned int size)
{
struct task_struct *tsk = current;
struct user_i387_ia32_struct env;
int err;
err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
- sizeof(struct i387_fxsave_struct));
+ size);
/* mxcsr reserved bits must be masked to zero for security reasons */
tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
if (err || __copy_from_user(&env, buf, sizeof(env)))
@@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
return 0;
}
-int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
+static int restore_i387_xsave(void __user *buf)
+{
+ struct _fpx_sw_bytes fx_sw_user;
+ struct _fpstate_ia32 __user *fx_user =
+ ((struct _fpstate_ia32 __user *) buf);
+ struct i387_fxsave_struct __user *fx =
+ (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
+ struct xsave_hdr_struct *xsave_hdr =
+ &current->thread.xstate->xsave.xsave_hdr;
+ u64 mask;
+ int err;
+
+ if (check_for_xstate(fx, buf, &fx_sw_user))
+ goto fx_only;
+
+ mask = fx_sw_user.xstate_bv;
+
+ err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
+
+ xsave_hdr->xstate_bv &= pcntxt_mask;
+ /*
+ * These bits must be zero.
+ */
+ xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+
+ /*
+ * Init the state that is not present in the memory layout
+ * and enabled by the OS.
+ */
+ mask = ~(pcntxt_mask & ~mask);
+ xsave_hdr->xstate_bv &= mask;
+
+ return err;
+fx_only:
+ /*
+ * Couldn't find the extended state information in the memory
+ * layout. Restore the FP/SSE and init the other extended state
+ * enabled by the OS.
+ */
+ xsave_hdr->xstate_bv = XSTATE_FPSSE;
+ return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
+}
+
+int restore_i387_xstate_ia32(void __user *buf)
{
int err;
struct task_struct *tsk = current;
+ struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
if (HAVE_HWFP)
clear_fpu(tsk);
+ if (!buf) {
+ if (used_math()) {
+ clear_fpu(tsk);
+ clear_used_math();
+ }
+
+ return 0;
+ } else
+ if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
+ return -EACCES;
+
if (!used_math()) {
err = init_fpu(tsk);
if (err)
@@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
}
if (HAVE_HWFP) {
- if (cpu_has_fxsr)
- err = restore_i387_fxsave(buf);
+ if (cpu_has_xsave)
+ err = restore_i387_xsave(buf);
+ else if (cpu_has_fxsr)
+ err = restore_i387_fxsave(fp, sizeof(struct
+ i387_fxsave_struct));
else
- err = restore_i387_fsave(buf);
+ err = restore_i387_fsave(fp);
} else {
err = fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
- NULL, buf) != 0;
+ NULL, fp) != 0;
}
set_used_math();
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d920..4b8a53d841f 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
device_initcall(i8259A_init_sysfs);
+void mask_8259A(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
+ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void unmask_8259A(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+
+ outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
+
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
void init_8259A(int auto_eoi)
{
unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb57bec..e710289f673 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,10 +46,13 @@
#include <asm/nmi.h>
#include <asm/msidef.h>
#include <asm/hypertransport.h>
+#include <asm/setup.h>
#include <mach_apic.h>
#include <mach_apicdef.h>
+#define __apicdebuginit(type) static type __init
+
int (*ioapic_renumber_irq)(int ioapic, int irq);
atomic_t irq_mis_count;
@@ -1341,7 +1344,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
ioapic_write_entry(apic, pin, entry);
}
-void __init print_IO_APIC(void)
+
+__apicdebuginit(void) print_IO_APIC(void)
{
int apic, i;
union IO_APIC_reg_00 reg_00;
@@ -1456,9 +1460,7 @@ void __init print_IO_APIC(void)
return;
}
-#if 0
-
-static void print_APIC_bitfield(int base)
+__apicdebuginit(void) print_APIC_bitfield(int base)
{
unsigned int v;
int i, j;
@@ -1479,9 +1481,10 @@ static void print_APIC_bitfield(int base)
}
}
-void /*__init*/ print_local_APIC(void *dummy)
+__apicdebuginit(void) print_local_APIC(void *dummy)
{
unsigned int v, ver, maxlvt;
+ u64 icr;
if (apic_verbosity == APIC_QUIET)
return;
@@ -1490,7 +1493,7 @@ void /*__init*/ print_local_APIC(void *dummy)
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
- GET_APIC_ID(read_apic_id()));
+ GET_APIC_ID(v));
v = apic_read(APIC_LVR);
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v);
@@ -1532,10 +1535,9 @@ void /*__init*/ print_local_APIC(void *dummy)
printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
}
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+ icr = apic_icr_read();
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
v = apic_read(APIC_LVTT);
printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1563,12 +1565,12 @@ void /*__init*/ print_local_APIC(void *dummy)
printk("\n");
}
-void print_all_local_APICs(void)
+__apicdebuginit(void) print_all_local_APICs(void)
{
on_each_cpu(print_local_APIC, NULL, 1);
}
-void /*__init*/ print_PIC(void)
+__apicdebuginit(void) print_PIC(void)
{
unsigned int v;
unsigned long flags;
@@ -1600,7 +1602,17 @@ void /*__init*/ print_PIC(void)
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
-#endif /* 0 */
+__apicdebuginit(int) print_all_ICs(void)
+{
+ print_PIC();
+ print_all_local_APICs();
+ print_IO_APIC();
+
+ return 0;
+}
+
+fs_initcall(print_all_ICs);
+
static void __init enable_IO_APIC(void)
{
@@ -1698,8 +1710,7 @@ void disable_IO_APIC(void)
entry.dest_mode = 0; /* Physical */
entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0;
- entry.dest.physical.physical_dest =
- GET_APIC_ID(read_apic_id());
+ entry.dest.physical.physical_dest = read_apic_id();
/*
* Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1736,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
unsigned char old_id;
unsigned long flags;
-#ifdef CONFIG_X86_NUMAQ
- if (found_numaq)
+ if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
return;
-#endif
/*
* Don't check I/O APIC IDs for xAPIC systems. They have
@@ -2329,8 +2338,6 @@ void __init setup_IO_APIC(void)
setup_IO_APIC_irqs();
init_IO_APIC_traps();
check_timer();
- if (!acpi_ioapic)
- print_IO_APIC();
}
/*
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b70c18..a1bec2969c6 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
#include <acpi/acpi_bus.h>
#endif
#include <linux/bootmem.h>
+#include <linux/dmar.h>
#include <asm/idle.h>
#include <asm/io.h>
@@ -49,10 +50,13 @@
#include <asm/nmi.h>
#include <asm/msidef.h>
#include <asm/hypertransport.h>
+#include <asm/irq_remapping.h>
#include <mach_ipi.h>
#include <mach_apic.h>
+#define __apicdebuginit(type) static type __init
+
struct irq_cfg {
cpumask_t domain;
cpumask_t old_domain;
@@ -87,8 +91,6 @@ int first_system_vector = 0xfe;
char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-#define __apicdebuginit __init
-
int sis_apic_bug; /* not actually supported, dummy for compile */
static int no_timer_check;
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
*/
int nr_ioapic_registers[MAX_IO_APICS];
+/* I/O APIC RTE contents at the OS boot up */
+struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
/* I/O APIC entries */
struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
pin = entry->pin;
if (pin == -1)
break;
- io_apic_write(apic, 0x11 + pin*2, dest);
+ /*
+ * With interrupt-remapping, destination information comes
+ * from interrupt-remapping table entry.
+ */
+ if (!irq_remapped(irq))
+ io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
clear_IO_APIC_pin(apic, pin);
}
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+ union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+ int apic, pin;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+ */
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ reg_01.raw = io_apic_read(apic, 1);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+ }
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ early_ioapic_entries[apic] =
+ kzalloc(sizeof(struct IO_APIC_route_entry) *
+ nr_ioapic_registers[apic], GFP_KERNEL);
+ if (!early_ioapic_entries[apic])
+ return -ENOMEM;
+ }
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ struct IO_APIC_route_entry entry;
+
+ entry = early_ioapic_entries[apic][pin] =
+ ioapic_read_entry(apic, pin);
+ if (!entry.mask) {
+ entry.mask = 1;
+ ioapic_write_entry(apic, pin, entry);
+ }
+ }
+ return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+ int apic, pin;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ ioapic_write_entry(apic, pin,
+ early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+ /*
+ * for now plain restore of previous settings.
+ * TBD: In the case of OS enabling interrupt-remapping,
+ * IO-APIC RTE's need to be setup to point to interrupt-remapping
+ * table entries. for now, do a plain restore, and wait for
+ * the setup_IO_APIC_irqs() to do proper initialization.
+ */
+ restore_IO_APIC_setup();
+}
+
int skip_ioapic_setup;
int ioapic_force;
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
}
static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
static void ioapic_register_intr(int irq, unsigned long trigger)
{
- if (trigger) {
+ if (trigger)
irq_desc[irq].status |= IRQ_LEVEL;
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_fasteoi_irq, "fasteoi");
- } else {
+ else
irq_desc[irq].status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+ if (trigger)
+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+ handle_fasteoi_irq,
+ "fasteoi");
+ else
+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+ handle_edge_irq, "edge");
+ return;
+ }
+#endif
+ if (trigger)
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
+ handle_fasteoi_irq,
+ "fasteoi");
+ else
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination, int trigger,
+ int polarity, int vector)
+{
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+ if (intr_remapping_enabled) {
+ struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+ struct irte irte;
+ struct IR_IO_APIC_route_entry *ir_entry =
+ (struct IR_IO_APIC_route_entry *) entry;
+ int index;
+
+ if (!iommu)
+ panic("No mapping iommu for ioapic %d\n", apic);
+
+ index = alloc_irte(iommu, irq, 1);
+ if (index < 0)
+ panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+ memset(&irte, 0, sizeof(irte));
+
+ irte.present = 1;
+ irte.dst_mode = INT_DEST_MODE;
+ irte.trigger_mode = trigger;
+ irte.dlvry_mode = INT_DELIVERY_MODE;
+ irte.vector = vector;
+ irte.dest_id = IRTE_DEST(destination);
+
+ modify_irte(irq, &irte);
+
+ ir_entry->index2 = (index >> 15) & 0x1;
+ ir_entry->zero = 0;
+ ir_entry->format = 1;
+ ir_entry->index = (index & 0x7fff);
+ } else
+#endif
+ {
+ entry->delivery_mode = INT_DELIVERY_MODE;
+ entry->dest_mode = INT_DEST_MODE;
+ entry->dest = destination;
}
+
+ entry->mask = 0; /* enable IRQ */
+ entry->trigger = trigger;
+ entry->polarity = polarity;
+ entry->vector = vector;
+
+ /* Mask level triggered irqs.
+ * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+ */
+ if (trigger)
+ entry->mask = 1;
+ return 0;
}
static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
irq, trigger, polarity);
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
-
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.dest = cpu_mask_to_apicid(mask);
- entry.mask = 0; /* enable IRQ */
- entry.trigger = trigger;
- entry.polarity = polarity;
- entry.vector = cfg->vector;
- /* Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
- */
- if (trigger)
- entry.mask = 1;
+ if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+ cpu_mask_to_apicid(mask), trigger, polarity,
+ cfg->vector)) {
+ printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ mp_ioapics[apic].mp_apicid, pin);
+ __clear_irq_vector(irq);
+ return;
+ }
ioapic_register_intr(irq, trigger);
if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
{
struct IO_APIC_route_entry entry;
+ if (intr_remapping_enabled)
+ return;
+
memset(&entry, 0, sizeof(entry));
/*
@@ -970,7 +1117,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
ioapic_write_entry(apic, pin, entry);
}
-void __apicdebuginit print_IO_APIC(void)
+
+__apicdebuginit(void) print_IO_APIC(void)
{
int apic, i;
union IO_APIC_reg_00 reg_00;
@@ -1064,9 +1212,7 @@ void __apicdebuginit print_IO_APIC(void)
return;
}
-#if 0
-
-static __apicdebuginit void print_APIC_bitfield (int base)
+__apicdebuginit(void) print_APIC_bitfield(int base)
{
unsigned int v;
int i, j;
@@ -1087,9 +1233,10 @@ static __apicdebuginit void print_APIC_bitfield (int base)
}
}
-void __apicdebuginit print_local_APIC(void * dummy)
+__apicdebuginit(void) print_local_APIC(void *dummy)
{
unsigned int v, ver, maxlvt;
+ unsigned long icr;
if (apic_verbosity == APIC_QUIET)
return;
@@ -1097,7 +1244,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
v = apic_read(APIC_LVR);
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
ver = GET_APIC_VERSION(v);
@@ -1133,10 +1280,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
v = apic_read(APIC_ESR);
printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
- v = apic_read(APIC_ICR);
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
- v = apic_read(APIC_ICR2);
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+ icr = apic_icr_read();
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
v = apic_read(APIC_LVTT);
printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1164,12 +1310,12 @@ void __apicdebuginit print_local_APIC(void * dummy)
printk("\n");
}
-void print_all_local_APICs (void)
+__apicdebuginit(void) print_all_local_APICs(void)
{
on_each_cpu(print_local_APIC, NULL, 1);
}
-void __apicdebuginit print_PIC(void)
+__apicdebuginit(void) print_PIC(void)
{
unsigned int v;
unsigned long flags;
@@ -1201,7 +1347,17 @@ void __apicdebuginit print_PIC(void)
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
-#endif /* 0 */
+__apicdebuginit(int) print_all_ICs(void)
+{
+ print_PIC();
+ print_all_local_APICs();
+ print_IO_APIC();
+
+ return 0;
+}
+
+fs_initcall(print_all_ICs);
+
void __init enable_IO_APIC(void)
{
@@ -1291,7 +1447,7 @@ void disable_IO_APIC(void)
entry.dest_mode = 0; /* Physical */
entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0;
- entry.dest = GET_APIC_ID(read_apic_id());
+ entry.dest = read_apic_id();
/*
* Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1553,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
*/
#ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_desc *desc = irq_desc + irq;
+ cpumask_t tmp, cleanup_mask;
+ struct irte irte;
+ int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+ unsigned int dest;
+ unsigned long flags;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (get_irte(irq, &irte))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ if (modify_ioapic_rte) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __target_IO_APIC_irq(irq, dest, cfg->vector);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ /*
+ * Modified the IRTE and flushes the Interrupt entry cache.
+ */
+ modify_irte(irq, &irte);
+
+ if (cfg->move_in_progress) {
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+
+ irq_desc[irq].affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+ int ret = -1;
+
+ mask_IO_APIC_irq(irq);
+
+ if (io_apic_level_ack_pending(irq)) {
+ /*
+ * Interrupt in progress. Migrating irq now will change the
+ * vector information in the IO-APIC RTE and that will confuse
+ * the EOI broadcast performed by cpu.
+ * So, delay the irq migration to the next instance.
+ */
+ schedule_delayed_work(&ir_migration_work, 1);
+ goto unmask;
+ }
+
+ /* everthing is clear. we have right of way */
+ migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+
+ ret = 0;
+ irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
+ cpus_clear(irq_desc[irq].pending_mask);
+
+unmask:
+ unmask_IO_APIC_irq(irq);
+ return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+ int irq;
+
+ for (irq = 0; irq < NR_IRQS; irq++) {
+ struct irq_desc *desc = irq_desc + irq;
+ if (desc->status & IRQ_MOVE_PENDING) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->chip->set_affinity ||
+ !(desc->status & IRQ_MOVE_PENDING)) {
+ desc->status &= ~IRQ_MOVE_PENDING;
+ spin_unlock_irqrestore(&desc->lock, flags);
+ continue;
+ }
+
+ desc->chip->set_affinity(irq,
+ irq_desc[irq].pending_mask);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+ }
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ if (irq_desc[irq].status & IRQ_LEVEL) {
+ irq_desc[irq].status |= IRQ_MOVE_PENDING;
+ irq_desc[irq].pending_mask = mask;
+ migrate_irq_remapped_level(irq);
+ return;
+ }
+
+ migrate_ioapic_irq(irq, mask);
+}
+#endif
+
asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
@@ -1453,6 +1750,17 @@ static void irq_complete_move(unsigned int irq)
#else
static inline void irq_complete_move(unsigned int irq) {}
#endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+ ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+ ack_x2APIC_irq();
+}
+#endif
static void ack_apic_edge(unsigned int irq)
{
@@ -1527,6 +1835,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
.retrigger = ioapic_retrigger_irq,
};
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+ .name = "IR-IO-APIC",
+ .startup = startup_ioapic_irq,
+ .mask = mask_IO_APIC_irq,
+ .unmask = unmask_IO_APIC_irq,
+ .ack = ack_x2apic_edge,
+ .eoi = ack_x2apic_level,
+#ifdef CONFIG_SMP
+ .set_affinity = set_ir_ioapic_affinity_irq,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+#endif
+
static inline void init_IO_APIC_traps(void)
{
int irq;
@@ -1712,6 +2035,8 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
+ if (intr_remapping_enabled)
+ panic("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@@ -1738,6 +2063,8 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
+ if (intr_remapping_enabled)
+ panic("timer doesn't work through Interrupt-remapped IO-APIC");
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1854,8 +2181,6 @@ void __init setup_IO_APIC(void)
setup_IO_APIC_irqs();
init_IO_APIC_traps();
check_timer();
- if (!acpi_ioapic)
- print_IO_APIC();
}
struct sysfs_ioapic_data {
@@ -1977,6 +2302,9 @@ void destroy_irq(unsigned int irq)
dynamic_irq_cleanup(irq);
+#ifdef CONFIG_INTR_REMAP
+ free_irte(irq);
+#endif
spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,11 +2323,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
tmp = TARGET_CPUS;
err = assign_irq_vector(irq, tmp);
- if (!err) {
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
+ if (err)
+ return err;
+
+ cpus_and(tmp, cfg->domain, tmp);
+ dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ struct irte irte;
+ int ir_index;
+ u16 sub_handle;
+
+ ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+ BUG_ON(ir_index == -1);
+
+ memset (&irte, 0, sizeof(irte));
+
+ irte.present = 1;
+ irte.dst_mode = INT_DEST_MODE;
+ irte.trigger_mode = 0; /* edge */
+ irte.dlvry_mode = INT_DELIVERY_MODE;
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ modify_irte(irq, &irte);
msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->data = sub_handle;
+ msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+ MSI_ADDR_IR_SHV |
+ MSI_ADDR_IR_INDEX1(ir_index) |
+ MSI_ADDR_IR_INDEX2(ir_index);
+ } else
+#endif
+ {
+ msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo =
MSI_ADDR_BASE_LO |
((INT_DEST_MODE == 0) ?
@@ -2049,6 +2408,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
write_msi_msg(irq, &msg);
irq_desc[irq].affinity = mask;
}
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ unsigned int dest;
+ cpumask_t tmp, cleanup_mask;
+ struct irte irte;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ if (get_irte(irq, &irte))
+ return;
+
+ if (assign_irq_vector(irq, mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+ dest = cpu_mask_to_apicid(tmp);
+
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+
+ /*
+ * atomically update the IRTE with the new destination and vector.
+ */
+ modify_irte(irq, &irte);
+
+ /*
+ * After this point, all the interrupts will start arriving
+ * at the new destination. So, time to cleanup the previous
+ * vector allocation.
+ */
+ if (cfg->move_in_progress) {
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+
+ irq_desc[irq].affinity = mask;
+}
+#endif
#endif /* CONFIG_SMP */
/*
@@ -2066,26 +2474,157 @@ static struct irq_chip msi_chip = {
.retrigger = ioapic_retrigger_irq,
};
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+ .name = "IR-PCI-MSI",
+ .unmask = unmask_msi_irq,
+ .mask = mask_msi_irq,
+ .ack = ack_x2apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = ir_set_msi_irq_affinity,
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
{
+ struct intel_iommu *iommu;
+ int index;
+
+ iommu = map_dev_to_ir(dev);
+ if (!iommu) {
+ printk(KERN_ERR
+ "Unable to map PCI %s to iommu\n", pci_name(dev));
+ return -ENOENT;
+ }
+
+ index = alloc_irte(iommu, irq, nvec);
+ if (index < 0) {
+ printk(KERN_ERR
+ "Unable to allocate %d IRTE for PCI %s\n", nvec,
+ pci_name(dev));
+ return -ENOSPC;
+ }
+ return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+{
+ int ret;
struct msi_msg msg;
+
+ ret = msi_compose_msg(dev, irq, &msg);
+ if (ret < 0)
+ return ret;
+
+ set_irq_msi(irq, desc);
+ write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+ struct irq_desc *desc = irq_desc + irq;
+ /*
+ * irq migration in process context
+ */
+ desc->status |= IRQ_MOVE_PCNTXT;
+ set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+ } else
+#endif
+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+ return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
int irq, ret;
+
irq = create_irq();
if (irq < 0)
return irq;
- ret = msi_compose_msg(dev, irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+ if (!intr_remapping_enabled)
+ goto no_ir;
+
+ ret = msi_alloc_irte(dev, irq, 1);
+ if (ret < 0)
+ goto error;
+no_ir:
+#endif
+ ret = setup_msi_irq(dev, desc, irq);
if (ret < 0) {
destroy_irq(irq);
return ret;
}
+ return 0;
- set_irq_msi(irq, desc);
- write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+ destroy_irq(irq);
+ return ret;
+#endif
+}
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ int irq, ret, sub_handle;
+ struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+ struct intel_iommu *iommu = 0;
+ int index = 0;
+#endif
+
+ sub_handle = 0;
+ list_for_each_entry(desc, &dev->msi_list, list) {
+ irq = create_irq();
+ if (irq < 0)
+ return irq;
+#ifdef CONFIG_INTR_REMAP
+ if (!intr_remapping_enabled)
+ goto no_ir;
+ if (!sub_handle) {
+ /*
+ * allocate the consecutive block of IRTE's
+ * for 'nvec'
+ */
+ index = msi_alloc_irte(dev, irq, nvec);
+ if (index < 0) {
+ ret = index;
+ goto error;
+ }
+ } else {
+ iommu = map_dev_to_ir(dev);
+ if (!iommu) {
+ ret = -ENOENT;
+ goto error;
+ }
+ /*
+ * setup the mapping between the irq and the IRTE
+ * base index, the sub_handle pointing to the
+ * appropriate interrupt remap table entry.
+ */
+ set_irte_irq(irq, iommu, index, sub_handle);
+ }
+no_ir:
+#endif
+ ret = setup_msi_irq(dev, desc, irq);
+ if (ret < 0)
+ goto error;
+ sub_handle++;
+ }
return 0;
+
+error:
+ destroy_irq(irq);
+ return ret;
}
void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2872,10 @@ void __init setup_ioapic_dest(void)
setup_IO_APIC_irq(ioapic, pin, irq,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+ else if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
else
set_ioapic_affinity_irq(irq, TARGET_CPUS);
}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 50e5e4a31c8..19191430274 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/syscalls.h>
+#include <asm/syscalls.h>
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
static void set_bitmap(unsigned long *bitmap, unsigned int base,
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 3f7537b669d..f1c688e46f3 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -20,6 +20,8 @@
#ifdef CONFIG_X86_32
#include <mach_apic.h>
+#include <mach_ipi.h>
+
/*
* the following functions deal with sending IPIs between CPUs.
*
@@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
}
/* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
static int convert_apicid_to_cpu(int apic_id)
{
int i;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index d66914287ee..9200a1e2752 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -74,6 +74,15 @@ void __init init_ISA_irqs (void)
}
}
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+static struct irqaction irq2 = {
+ .handler = no_action,
+ .mask = CPU_MASK_NONE,
+ .name = "cascade",
+};
+
/* Overridden in paravirt.c */
void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
@@ -98,6 +107,46 @@ void __init native_init_IRQ(void)
set_intr_gate(vector, interrupt[i]);
}
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
+ /*
+ * IRQ0 must be given a fixed assignment and initialized,
+ * because it's used before the IO-APIC is set up.
+ */
+ set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+ /*
+ * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+ * IPI, driven by wakeup.
+ */
+ alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+ /* IPI for invalidation */
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+ /* IPI for generic function call */
+ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+ /* IPI for single call function */
+ set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ /* self generated IPI for local APIC timer */
+ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+ /* IPI vectors for APIC spurious and error interrupts */
+ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+ alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#endif
+
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
+ /* thermal monitor LVT interrupt */
+ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+#endif
+
+ if (!acpi_ioapic)
+ setup_irq(2, &irq2);
+
/* setup after call gates are initialised (usually add in
* the architecture specific gates)
*/
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b68e21f06f4..0ed5f939b90 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -18,6 +18,7 @@
#include <asm/ldt.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
+#include <asm/syscalls.h>
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b3fb430725c..f98f4e1dba0 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
generic_bigsmp_probe();
#endif
+#ifdef CONFIG_X86_32
setup_apic_routing();
+#endif
if (!num_processors)
printk(KERN_ERR "MPTABLE: no processors registered!\n");
return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index eecc8c18f01..4caff39078e 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
}
}
+static int __init numaq_setup_ioapic_ids(void)
+{
+ /* so can skip it */
+ return 1;
+}
+
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
.mpc_oem_bus_info = mpc_oem_bus_info,
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
+ .setup_ioapic_ids = numaq_setup_ioapic_ids,
};
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e2f43768723..6b0bb73998d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -374,8 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = {
struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC
- .apic_write = native_apic_write,
- .apic_read = native_apic_read,
.setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock,
.startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 3b7a1ddcc0b..2c9abc95e02 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,6 +55,8 @@
#include <asm/tlbflush.h>
#include <asm/cpu.h>
#include <asm/kdebug.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71553b664e2..00263c9e650 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,7 @@
#include <asm/proto.h>
#include <asm/ia32.h>
#include <asm/idle.h>
+#include <asm/syscalls.h>
asmlinkage extern void ret_from_fork(void);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e37dccce85d..fc3e8dcd9da 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -69,7 +69,7 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
regno >>= 2;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 362d4e7f2d3..673f12cf6eb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -739,6 +739,8 @@ void __init setup_arch(char **cmdline_p)
num_physpages = max_pfn;
check_efer();
+ if (cpu_has_x2apic)
+ check_x2apic();
/* How many end-of-memory variables you have, grandma! */
/* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 72bbb519d2d..6dd7e2b70a4 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -3,9 +3,18 @@ struct sigframe {
char __user *pretcode;
int sig;
struct sigcontext sc;
- struct _fpstate fpstate;
+ /*
+ * fpstate is unused. fpstate is moved/allocated after
+ * retcode[] below. This movement allows to have the FP state and the
+ * future state extensions (xsave) stay together.
+ * And at the same time retaining the unused fpstate, prevents changing
+ * the offset of extramask[] in the sigframe and thus prevent any
+ * legacy application accessing/modifying it.
+ */
+ struct _fpstate fpstate_unused;
unsigned long extramask[_NSIG_WORDS-1];
char retcode[8];
+ /* fp state follows here */
};
struct rt_sigframe {
@@ -15,13 +24,14 @@ struct rt_sigframe {
void __user *puc;
struct siginfo info;
struct ucontext uc;
- struct _fpstate fpstate;
char retcode[8];
+ /* fp state follows here */
};
#else
struct rt_sigframe {
char __user *pretcode;
struct ucontext uc;
struct siginfo info;
+ /* fp state follows here */
};
#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 6fb5bcdd893..8d380b699c0 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -26,6 +26,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/vdso.h>
+#include <asm/syscalls.h>
#include "sigframe.h"
@@ -159,28 +160,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
}
{
- struct _fpstate __user *buf;
+ void __user *buf;
err |= __get_user(buf, &sc->fpstate);
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387(buf);
- } else {
- struct task_struct *me = current;
-
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
- }
+ err |= restore_i387_xstate(buf);
}
err |= __get_user(*pax, &sc->ax);
return err;
-
-badframe:
- return 1;
}
asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@@ -262,7 +249,7 @@ badframe:
* Set up a signal frame.
*/
static int
-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
+setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
int tmp, err = 0;
@@ -289,7 +276,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
err |= __put_user(regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
- tmp = save_i387(fpstate);
+ tmp = save_i387_xstate(fpstate);
if (tmp < 0)
err = 1;
else
@@ -306,7 +293,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
* Determine which stack to use..
*/
static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+ void **fpstate)
{
unsigned long sp;
@@ -332,6 +320,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
sp = (unsigned long) ka->sa.sa_restorer;
}
+ if (used_math()) {
+ sp = sp - sig_xstate_size;
+ *fpstate = (struct _fpstate *) sp;
+ }
+
sp -= frame_size;
/*
* Align the stack pointer according to the i386 ABI,
@@ -350,8 +343,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
void __user *restorer;
int err = 0;
int usig;
+ void __user *fpstate = NULL;
- frame = get_sigframe(ka, regs, sizeof(*frame));
+ frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
@@ -366,7 +360,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
if (err)
goto give_sigsegv;
- err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+ err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
if (err)
goto give_sigsegv;
@@ -427,8 +421,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
void __user *restorer;
int err = 0;
int usig;
+ void __user *fpstate = NULL;
- frame = get_sigframe(ka, regs, sizeof(*frame));
+ frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv;
@@ -447,13 +442,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv;
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
+ if (cpu_has_xsave)
+ err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index ca316b5b742..4665b598a37 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -26,6 +26,7 @@
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#include <asm/mce.h>
+#include <asm/syscalls.h>
#include "sigframe.h"
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -54,69 +55,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
}
/*
- * Signal frame handlers.
- */
-
-static inline int save_i387(struct _fpstate __user *buf)
-{
- struct task_struct *tsk = current;
- int err = 0;
-
- BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
- sizeof(tsk->thread.xstate->fxsave));
-
- if ((unsigned long)buf % 16)
- printk("save_i387: bad fpstate %p\n", buf);
-
- if (!used_math())
- return 0;
- clear_used_math(); /* trigger finit */
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
- err = save_i387_checking((struct i387_fxsave_struct __user *)
- buf);
- if (err)
- return err;
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
- } else {
- if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
- sizeof(struct i387_fxsave_struct)))
- return -1;
- }
- return 1;
-}
-
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-static inline int restore_i387(struct _fpstate __user *buf)
-{
- struct task_struct *tsk = current;
- int err;
-
- if (!used_math()) {
- err = init_fpu(tsk);
- if (err)
- return err;
- }
-
- if (!(task_thread_info(current)->status & TS_USEDFPU)) {
- clts();
- task_thread_info(current)->status |= TS_USEDFPU;
- }
- err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
- if (unlikely(err)) {
- /*
- * Encountered an error while doing the restore from the
- * user buffer, clear the fpu state.
- */
- clear_fpu(tsk);
- clear_used_math();
- }
- return err;
-}
-
-/*
* Do a signal return; undo the signal stack.
*/
static int
@@ -160,25 +98,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
{
struct _fpstate __user * buf;
err |= __get_user(buf, &sc->fpstate);
-
- if (buf) {
- if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
- goto badframe;
- err |= restore_i387(buf);
- } else {
- struct task_struct *me = current;
- if (used_math()) {
- clear_fpu(me);
- clear_used_math();
- }
- }
+ err |= restore_i387_xstate(buf);
}
err |= __get_user(*pax, &sc->ax);
return err;
-
-badframe:
- return 1;
}
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
@@ -269,26 +193,23 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
sp = current->sas_ss_sp + current->sas_ss_size;
}
- return (void __user *)round_down(sp - size, 16);
+ return (void __user *)round_down(sp - size, 64);
}
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs * regs)
{
struct rt_sigframe __user *frame;
- struct _fpstate __user *fp = NULL;
+ void __user *fp = NULL;
int err = 0;
struct task_struct *me = current;
if (used_math()) {
- fp = get_stack(ka, regs, sizeof(struct _fpstate));
+ fp = get_stack(ka, regs, sig_xstate_size);
frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
- if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
- goto give_sigsegv;
-
- if (save_i387(fp) < 0)
+ if (save_i387_xstate(fp) < 0)
err |= -1;
} else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
@@ -303,7 +224,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
}
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
+ if (cpu_has_xsave)
+ err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp),
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7985c5b3f91..aa804c64b16 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
#else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
@@ -123,13 +123,12 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
static atomic_t init_deasserted;
-static int boot_cpu_logical_apicid;
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
/* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
#endif
#ifdef CONFIG_X86_32
+static int boot_cpu_logical_apicid;
+
u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = BAD_APICID };
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
/*
* (This works even if the APIC is not enabled.)
*/
- phys_id = GET_APIC_ID(read_apic_id());
+ phys_id = read_apic_id();
cpuid = smp_processor_id();
if (cpu_isset(cpuid, cpu_callin_map)) {
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -550,8 +551,7 @@ static inline void __inquire_remote_apic(int apicid)
printk(KERN_CONT
"a previous APIC delivery may have failed\n");
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
+ apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
timeout = 0;
do {
@@ -583,11 +583,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
int maxlvt;
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
/* Boot on the stack */
/* Kick the second */
- apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+ apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -640,13 +638,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
/*
* Turn INIT on target chip
*/
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/*
* Send IPI
*/
- apic_write(APIC_ICR,
- APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+ phys_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -656,10 +652,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
pr_debug("Deasserting INIT.\n");
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/* Send IPI */
- apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -702,11 +696,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
*/
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/* Boot on the stack */
/* Kick the second */
- apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
+ apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+ phys_apicid);
/*
* Give the other CPU some time to accept the IPI.
@@ -1175,10 +1168,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
+#ifdef CONFIG_X86_32
boot_cpu_logical_apicid = logical_smp_processor_id();
+#endif
current_thread_info()->cpu = 0; /* needed? */
set_cpu_sibling_map(0);
+#ifdef CONFIG_X86_64
+ enable_IR_x2apic();
+ setup_apic_routing();
+#endif
+
if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n");
disable_smp();
@@ -1186,9 +1186,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
}
preempt_disable();
- if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+ if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
- GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+ read_apic_id(), boot_cpu_physical_apicid);
/* Or can we switch back to PIC here? */
}
preempt_enable();
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f044b..7b987852e87 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
#include <linux/init.h>
#include <asm/io.h>
#include <asm/bios_ebda.h>
-#include <asm/mach-summit/mach_mpparse.h>
+#include <asm/summit/mpparse.h>
static struct rio_table_hdr *rio_table_hdr __initdata;
static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7066cb855a6..1884a8d12bf 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -22,6 +22,8 @@
#include <linux/uaccess.h>
#include <linux/unistd.h>
+#include <asm/syscalls.h>
+
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 3b360ef3381..c9288c883e2 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -16,6 +16,7 @@
#include <asm/uaccess.h>
#include <asm/ia32.h>
+#include <asm/syscalls.h>
asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long off)
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c1748..3d1be4f0fac 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
#define __NO_STUBS
#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
#include <asm/unistd_64.h>
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
typedef void (*sys_call_ptr_t)(void);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index ffe3c664afc..bbecf8b6bf9 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -36,6 +36,7 @@
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
#include <asm/time.h>
+#include <asm/timer.h>
#include "do_timer.h"
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index ab6bf375a30..6bb7b8579e7 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -10,6 +10,7 @@
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/proto.h>
+#include <asm/syscalls.h>
#include "tls.h"
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 03df8e45e5a..da5a5964fcc 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1228,7 +1228,6 @@ void __init trap_init(void)
set_bit(SYSCALL_VECTOR, used_vectors);
- init_thread_xstate();
/*
* Should be a barrier for any external CPU state:
*/
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 7ffc8cff6cc..b42068fb7b7 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1133,7 +1133,7 @@ asmlinkage void math_state_restore(void)
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
- if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
+ if (unlikely(restore_fpu_checking(me))) {
stts();
force_sig(SIGSEGV, me);
return;
@@ -1172,10 +1172,6 @@ void __init trap_init(void)
set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
#endif
/*
- * initialize the per thread extended state:
- */
- init_thread_xstate();
- /*
* Should be a barrier for any external CPU state:
*/
cpu_init();
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8e786b0d665..346cae5ac42 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -122,80 +122,217 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet)
return ULLONG_MAX;
}
-/**
- * native_calibrate_tsc - calibrate the tsc on boot
+/*
+ * Try to calibrate the TSC against the Programmable
+ * Interrupt Timer and return the frequency of the TSC
+ * in kHz.
+ *
+ * Return ULONG_MAX on failure to calibrate.
*/
-unsigned long native_calibrate_tsc(void)
+static unsigned long pit_calibrate_tsc(void)
{
- unsigned long flags;
- u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
- int hpet = is_hpet_enabled();
- unsigned int tsc_khz_val = 0;
-
- local_irq_save(flags);
-
- tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+ u64 tsc, t1, t2, delta;
+ unsigned long tscmin, tscmax;
+ int pitcnt;
+ /* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+ /*
+ * Setup CTC channel 2* for mode 0, (interrupt on terminal
+ * count mode), binary count. Set the latch register to 50ms
+ * (LSB then MSB) to begin countdown.
+ */
outb(0xb0, 0x43);
outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
- tr1 = get_cycles();
- while ((inb(0x61) & 0x20) == 0);
- tr2 = get_cycles();
- tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+ tsc = t1 = t2 = get_cycles();
- local_irq_restore(flags);
+ pitcnt = 0;
+ tscmax = 0;
+ tscmin = ULONG_MAX;
+ while ((inb(0x61) & 0x20) == 0) {
+ t2 = get_cycles();
+ delta = t2 - tsc;
+ tsc = t2;
+ if ((unsigned long) delta < tscmin)
+ tscmin = (unsigned int) delta;
+ if ((unsigned long) delta > tscmax)
+ tscmax = (unsigned int) delta;
+ pitcnt++;
+ }
/*
- * Preset the result with the raw and inaccurate PIT
- * calibration value
+ * Sanity checks:
+ *
+ * If we were not able to read the PIT more than 5000
+ * times, then we have been hit by a massive SMI
+ *
+ * If the maximum is 10 times larger than the minimum,
+ * then we got hit by an SMI as well.
*/
- delta = (tr2 - tr1);
+ if (pitcnt < 5000 || tscmax > 10 * tscmin)
+ return ULONG_MAX;
+
+ /* Calculate the PIT value */
+ delta = t2 - t1;
do_div(delta, 50);
- tsc_khz_val = delta;
+ return delta;
+}
+
+
+/**
+ * native_calibrate_tsc - calibrate the tsc on boot
+ */
+unsigned long native_calibrate_tsc(void)
+{
+ u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2;
+ unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
+ unsigned long flags;
+ int hpet = is_hpet_enabled(), i;
- /* hpet or pmtimer available ? */
+ /*
+ * Run 5 calibration loops to get the lowest frequency value
+ * (the best estimate). We use two different calibration modes
+ * here:
+ *
+ * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
+ * load a timeout of 50ms. We read the time right after we
+ * started the timer and wait until the PIT count down reaches
+ * zero. In each wait loop iteration we read the TSC and check
+ * the delta to the previous read. We keep track of the min
+ * and max values of that delta. The delta is mostly defined
+ * by the IO time of the PIT access, so we can detect when a
+ * SMI/SMM disturbance happend between the two reads. If the
+ * maximum time is significantly larger than the minimum time,
+ * then we discard the result and have another try.
+ *
+ * 2) Reference counter. If available we use the HPET or the
+ * PMTIMER as a reference to check the sanity of that value.
+ * We use separate TSC readouts and check inside of the
+ * reference read for a SMI/SMM disturbance. We dicard
+ * disturbed values here as well. We do that around the PIT
+ * calibration delay loop as we have to wait for a certain
+ * amount of time anyway.
+ */
+ for (i = 0; i < 5; i++) {
+ unsigned long tsc_pit_khz;
+
+ /*
+ * Read the start value and the reference count of
+ * hpet/pmtimer when available. Then do the PIT
+ * calibration, which will take at least 50ms, and
+ * read the end value.
+ */
+ local_irq_save(flags);
+ tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
+ tsc_pit_khz = pit_calibrate_tsc();
+ tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
+ local_irq_restore(flags);
+
+ /* Pick the lowest PIT TSC calibration so far */
+ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
+
+ /* hpet or pmtimer available ? */
+ if (!hpet && !pm1 && !pm2)
+ continue;
+
+ /* Check, whether the sampling was disturbed by an SMI */
+ if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
+ continue;
+
+ tsc2 = (tsc2 - tsc1) * 1000000LL;
+
+ if (hpet) {
+ if (hpet2 < hpet1)
+ hpet2 += 0x100000000ULL;
+ hpet2 -= hpet1;
+ tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
+ do_div(tsc1, 1000000);
+ } else {
+ if (pm2 < pm1)
+ pm2 += (u64)ACPI_PM_OVRRUN;
+ pm2 -= pm1;
+ tsc1 = pm2 * 1000000000LL;
+ do_div(tsc1, PMTMR_TICKS_PER_SEC);
+ }
+
+ do_div(tsc2, tsc1);
+ tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
+ }
+
+ /*
+ * Now check the results.
+ */
+ if (tsc_pit_min == ULONG_MAX) {
+ /* PIT gave no useful value */
+ printk(KERN_WARNING "TSC: PIT calibration failed due to "
+ "SMI disturbance.\n");
+
+ /* We don't have an alternative source, disable TSC */
+ if (!hpet && !pm1 && !pm2) {
+ printk("TSC: No reference (HPET/PMTIMER) available\n");
+ return 0;
+ }
+
+ /* The alternative source failed as well, disable TSC */
+ if (tsc_ref_min == ULONG_MAX) {
+ printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
+ "failed due to SMI disturbance.\n");
+ return 0;
+ }
+
+ /* Use the alternative source */
+ printk(KERN_INFO "TSC: using %s reference calibration\n",
+ hpet ? "HPET" : "PMTIMER");
+
+ return tsc_ref_min;
+ }
+
+ /* We don't have an alternative source, use the PIT calibration value */
if (!hpet && !pm1 && !pm2) {
- printk(KERN_INFO "TSC calibrated against PIT\n");
- goto out;
+ printk(KERN_INFO "TSC: Using PIT calibration value\n");
+ return tsc_pit_min;
}
- /* Check, whether the sampling was disturbed by an SMI */
- if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) {
- printk(KERN_WARNING "TSC calibration disturbed by SMI, "
- "using PIT calibration result\n");
- goto out;
+ /* The alternative source failed, use the PIT calibration value */
+ if (tsc_ref_min == ULONG_MAX) {
+ printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due "
+ "to SMI disturbance. Using PIT calibration\n");
+ return tsc_pit_min;
}
- tsc2 = (tsc2 - tsc1) * 1000000LL;
-
- if (hpet) {
- printk(KERN_INFO "TSC calibrated against HPET\n");
- if (hpet2 < hpet1)
- hpet2 += 0x100000000ULL;
- hpet2 -= hpet1;
- tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
- do_div(tsc1, 1000000);
- } else {
- printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
- if (pm2 < pm1)
- pm2 += (u64)ACPI_PM_OVRRUN;
- pm2 -= pm1;
- tsc1 = pm2 * 1000000000LL;
- do_div(tsc1, PMTMR_TICKS_PER_SEC);
+ /* Check the reference deviation */
+ delta = ((u64) tsc_pit_min) * 100;
+ do_div(delta, tsc_ref_min);
+
+ /*
+ * If both calibration results are inside a 5% window, the we
+ * use the lower frequency of those as it is probably the
+ * closest estimate.
+ */
+ if (delta >= 95 && delta <= 105) {
+ printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n",
+ hpet ? "HPET" : "PMTIMER");
+ printk(KERN_INFO "TSC: using %s calibration value\n",
+ tsc_pit_min <= tsc_ref_min ? "PIT" :
+ hpet ? "HPET" : "PMTIMER");
+ return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min;
}
- do_div(tsc2, tsc1);
- tsc_khz_val = tsc2;
+ printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
+ hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
-out:
- return tsc_khz_val;
+ /*
+ * The calibration values differ too much. In doubt, we use
+ * the PIT value as we know that there are PMTIMERs around
+ * running at double speed.
+ */
+ printk(KERN_INFO "TSC: Using PIT calibration value\n");
+ return tsc_pit_min;
}
-
#ifdef CONFIG_X86_32
/* Only called from the Powernow K7 cpu freq driver */
int recalibrate_cpu_khz(void)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 38f566fa27d..4eeb5cf9720 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -46,6 +46,7 @@
#include <asm/io.h>
#include <asm/tlbflush.h>
#include <asm/irq.h>
+#include <asm/syscalls.h>
/*
* Known problems:
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db5..61531d5c950 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
#endif
#ifdef CONFIG_X86_LOCAL_APIC
- para_fill(pv_apic_ops.apic_read, APICRead);
- para_fill(pv_apic_ops.apic_write, APICWrite);
+ para_fill(apic_ops->read, APICRead);
+ para_fill(apic_ops->write, APICWrite);
#endif
/*
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
new file mode 100644
index 00000000000..07713d64deb
--- /dev/null
+++ b/arch/x86/kernel/xsave.c
@@ -0,0 +1,316 @@
+/*
+ * xsave/xrstor support.
+ *
+ * Author: Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/i387.h>
+#ifdef CONFIG_IA32_EMULATION
+#include <asm/sigcontext32.h>
+#endif
+#include <asm/xcr.h>
+
+/*
+ * Supported feature mask by the CPU and the kernel.
+ */
+u64 pcntxt_mask;
+
+struct _fpx_sw_bytes fx_sw_reserved;
+#ifdef CONFIG_IA32_EMULATION
+struct _fpx_sw_bytes fx_sw_reserved_ia32;
+#endif
+
+/*
+ * Check for the presence of extended state information in the
+ * user fpstate pointer in the sigcontext.
+ */
+int check_for_xstate(struct i387_fxsave_struct __user *buf,
+ void __user *fpstate,
+ struct _fpx_sw_bytes *fx_sw_user)
+{
+ int min_xstate_size = sizeof(struct i387_fxsave_struct) +
+ sizeof(struct xsave_hdr_struct);
+ unsigned int magic2;
+ int err;
+
+ err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
+ sizeof(struct _fpx_sw_bytes));
+
+ if (err)
+ return err;
+
+ /*
+ * First Magic check failed.
+ */
+ if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
+ return -1;
+
+ /*
+ * Check for error scenarios.
+ */
+ if (fx_sw_user->xstate_size < min_xstate_size ||
+ fx_sw_user->xstate_size > xstate_size ||
+ fx_sw_user->xstate_size > fx_sw_user->extended_size)
+ return -1;
+
+ err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
+ fx_sw_user->extended_size -
+ FP_XSTATE_MAGIC2_SIZE));
+ /*
+ * Check for the presence of second magic word at the end of memory
+ * layout. This detects the case where the user just copied the legacy
+ * fpstate layout with out copying the extended state information
+ * in the memory layout.
+ */
+ if (err || magic2 != FP_XSTATE_MAGIC2)
+ return -1;
+
+ return 0;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Signal frame handlers.
+ */
+
+int save_i387_xstate(void __user *buf)
+{
+ struct task_struct *tsk = current;
+ int err = 0;
+
+ if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
+ return -EACCES;
+
+ BUG_ON(sig_xstate_size < xstate_size);
+
+ if ((unsigned long)buf % 64)
+ printk("save_i387_xstate: bad fpstate %p\n", buf);
+
+ if (!used_math())
+ return 0;
+ clear_used_math(); /* trigger finit */
+ if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ /*
+ * Start with clearing the user buffer. This will present a
+ * clean context for the bytes not touched by the fxsave/xsave.
+ */
+ __clear_user(buf, sig_xstate_size);
+
+ if (task_thread_info(tsk)->status & TS_XSAVE)
+ err = xsave_user(buf);
+ else
+ err = fxsave_user(buf);
+
+ if (err)
+ return err;
+ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ stts();
+ } else {
+ if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
+ xstate_size))
+ return -1;
+ }
+
+ if (task_thread_info(tsk)->status & TS_XSAVE) {
+ struct _fpstate __user *fx = buf;
+
+ err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
+ sizeof(struct _fpx_sw_bytes));
+
+ err |= __put_user(FP_XSTATE_MAGIC2,
+ (__u32 __user *) (buf + sig_xstate_size
+ - FP_XSTATE_MAGIC2_SIZE));
+ }
+
+ return 1;
+}
+
+/*
+ * Restore the extended state if present. Otherwise, restore the FP/SSE
+ * state.
+ */
+int restore_user_xstate(void __user *buf)
+{
+ struct _fpx_sw_bytes fx_sw_user;
+ u64 mask;
+ int err;
+
+ if (((unsigned long)buf % 64) ||
+ check_for_xstate(buf, buf, &fx_sw_user))
+ goto fx_only;
+
+ mask = fx_sw_user.xstate_bv;
+
+ /*
+ * restore the state passed by the user.
+ */
+ err = xrestore_user(buf, mask);
+ if (err)
+ return err;
+
+ /*
+ * init the state skipped by the user.
+ */
+ mask = pcntxt_mask & ~mask;
+
+ xrstor_state(init_xstate_buf, mask);
+
+ return 0;
+
+fx_only:
+ /*
+ * couldn't find the extended state information in the
+ * memory layout. Restore just the FP/SSE and init all
+ * the other extended state.
+ */
+ xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
+ return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
+}
+
+/*
+ * This restores directly out of user space. Exceptions are handled.
+ */
+int restore_i387_xstate(void __user *buf)
+{
+ struct task_struct *tsk = current;
+ int err = 0;
+
+ if (!buf) {
+ if (used_math())
+ goto clear;
+ return 0;
+ } else
+ if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
+ return -EACCES;
+
+ if (!used_math()) {
+ err = init_fpu(tsk);
+ if (err)
+ return err;
+ }
+
+ if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+ clts();
+ task_thread_info(current)->status |= TS_USEDFPU;
+ }
+ if (task_thread_info(tsk)->status & TS_XSAVE)
+ err = restore_user_xstate(buf);
+ else
+ err = fxrstor_checking((__force struct i387_fxsave_struct *)
+ buf);
+ if (unlikely(err)) {
+ /*
+ * Encountered an error while doing the restore from the
+ * user buffer, clear the fpu state.
+ */
+clear:
+ clear_fpu(tsk);
+ clear_used_math();
+ }
+ return err;
+}
+#endif
+
+/*
+ * Prepare the SW reserved portion of the fxsave memory layout, indicating
+ * the presence of the extended state information in the memory layout
+ * pointed by the fpstate pointer in the sigcontext.
+ * This will be saved when ever the FP and extended state context is
+ * saved on the user stack during the signal handler delivery to the user.
+ */
+void prepare_fx_sw_frame(void)
+{
+ int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
+ FP_XSTATE_MAGIC2_SIZE;
+
+ sig_xstate_size = sizeof(struct _fpstate) + size_extended;
+
+#ifdef CONFIG_IA32_EMULATION
+ sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
+#endif
+
+ memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
+
+ fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
+ fx_sw_reserved.extended_size = sig_xstate_size;
+ fx_sw_reserved.xstate_bv = pcntxt_mask;
+ fx_sw_reserved.xstate_size = xstate_size;
+#ifdef CONFIG_IA32_EMULATION
+ memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
+ sizeof(struct _fpx_sw_bytes));
+ fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
+#endif
+}
+
+/*
+ * Represents init state for the supported extended state.
+ */
+struct xsave_struct *init_xstate_buf;
+
+#ifdef CONFIG_X86_64
+unsigned int sig_xstate_size = sizeof(struct _fpstate);
+#endif
+
+/*
+ * Enable the extended processor state save/restore feature
+ */
+void __cpuinit xsave_init(void)
+{
+ if (!cpu_has_xsave)
+ return;
+
+ set_in_cr4(X86_CR4_OSXSAVE);
+
+ /*
+ * Enable all the features that the HW is capable of
+ * and the Linux kernel is aware of.
+ */
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+}
+
+/*
+ * setup the xstate image representing the init state
+ */
+void setup_xstate_init(void)
+{
+ init_xstate_buf = alloc_bootmem(xstate_size);
+ init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
+}
+
+/*
+ * Enable and initialize the xsave feature.
+ */
+void __init xsave_cntxt_init(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+ pcntxt_mask = eax + ((u64)edx << 32);
+
+ if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
+ printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
+ pcntxt_mask);
+ BUG();
+ }
+
+ /*
+ * for now OS knows only about FP/SSE
+ */
+ pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
+ xsave_init();
+
+ /*
+ * Recompute the context size for enabled features
+ */
+ cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+ xstate_size = ebx;
+
+ prepare_fx_sw_frame();
+
+ setup_xstate_init();
+
+ printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
+ "cntxt size 0x%x\n",
+ pcntxt_mask, xstate_size);
+}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index f72ac1fa35f..4a814bff21f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -345,7 +345,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
shadow_addr = __pa(shadow_page->spt);
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
- *shadow_ent = shadow_pte;
+ set_shadow_pte(shadow_ent, shadow_pte);
}
mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d9249a882aa..65f0b8a47be 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -55,6 +55,7 @@
#include <linux/lguest_launcher.h>
#include <linux/virtio_console.h>
#include <linux/pm.h>
+#include <asm/apic.h>
#include <asm/lguest.h>
#include <asm/paravirt.h>
#include <asm/param.h>
@@ -783,14 +784,44 @@ static void lguest_wbinvd(void)
* code qualifies for Advanced. It will also never interrupt anything. It
* does, however, allow us to get through the Linux boot code. */
#ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(unsigned long reg, u32 v)
+static void lguest_apic_write(u32 reg, u32 v)
{
}
-static u32 lguest_apic_read(unsigned long reg)
+static u32 lguest_apic_read(u32 reg)
{
return 0;
}
+
+static u64 lguest_apic_icr_read(void)
+{
+ return 0;
+}
+
+static void lguest_apic_icr_write(u32 low, u32 id)
+{
+ /* Warn to see if there's any stray references */
+ WARN_ON(1);
+}
+
+static void lguest_apic_wait_icr_idle(void)
+{
+ return;
+}
+
+static u32 lguest_apic_safe_wait_icr_idle(void)
+{
+ return 0;
+}
+
+static struct apic_ops lguest_basic_apic_ops = {
+ .read = lguest_apic_read,
+ .write = lguest_apic_write,
+ .icr_read = lguest_apic_icr_read,
+ .icr_write = lguest_apic_icr_write,
+ .wait_icr_idle = lguest_apic_wait_icr_idle,
+ .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
+};
#endif
/* STOP! Until an interrupt comes in. */
@@ -990,8 +1021,7 @@ __init void lguest_init(void)
#ifdef CONFIG_X86_LOCAL_APIC
/* apic read/write intercepts */
- pv_apic_ops.apic_write = lguest_apic_write;
- pv_apic_ops.apic_read = lguest_apic_read;
+ apic_ops = &lguest_basic_apic_ops;
#endif
/* time operations */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index aa3fa411942..55e11aa6d66 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -17,9 +17,6 @@ ifeq ($(CONFIG_X86_32),y)
lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
else
obj-y += io_64.o iomap_copy_64.o
-
- CFLAGS_csum-partial_64.o := -funroll-loops
-
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
lib-y += thunk_64.o clear_page_64.o copy_page_64.o
lib-y += memmove_64.o memset_64.o
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3d317836be9..37b9ae4d44c 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,13 +10,15 @@
#include <asm/e820.h>
#include <asm/setup.h>
+#include <mach_ipi.h>
+
#ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1)
#else
#define DEFAULT_SEND_IPI (0)
#endif
-int no_broadcast=DEFAULT_SEND_IPI;
+int no_broadcast = DEFAULT_SEND_IPI;
/**
* pre_intr_init_hook - initialisation prior to setting up interrupt vectors
@@ -36,15 +38,6 @@ void __init pre_intr_init_hook(void)
init_ISA_irqs();
}
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-static struct irqaction irq2 = {
- .handler = no_action,
- .mask = CPU_MASK_NONE,
- .name = "cascade",
-};
-
/**
* intr_init_hook - post gate setup interrupt initialisation
*
@@ -60,12 +53,6 @@ void __init intr_init_hook(void)
if (x86_quirks->arch_intr_init())
return;
}
-#ifdef CONFIG_X86_LOCAL_APIC
- apic_intr_init();
-#endif
-
- if (!acpi_ioapic)
- setup_irq(2, &irq2);
}
/**
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile
deleted file mode 100644
index 3ef8b43b62f..00000000000
--- a/arch/x86/mach-es7000/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-obj-$(CONFIG_X86_ES7000) := es7000plat.o
diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/mach-es7000/es7000.h
deleted file mode 100644
index c8d5aa132fa..00000000000
--- a/arch/x86/mach-es7000/es7000.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Written by: Garry Forsgren, Unisys Corporation
- * Natalie Protasevich, Unisys Corporation
- * This file contains the code to configure and interface
- * with Unisys ES7000 series hardware system manager.
- *
- * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Unisys Corporation, Township Line & Union Meeting
- * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
- *
- * http://www.unisys.com
- */
-
-/*
- * ES7000 chipsets
- */
-
-#define NON_UNISYS 0
-#define ES7000_CLASSIC 1
-#define ES7000_ZORRO 2
-
-
-#define MIP_REG 1
-#define MIP_PSAI_REG 4
-
-#define MIP_BUSY 1
-#define MIP_SPIN 0xf0000
-#define MIP_VALID 0x0100000000000000ULL
-#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
-
-#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
-
-struct mip_reg_info {
- unsigned long long mip_info;
- unsigned long long delivery_info;
- unsigned long long host_reg;
- unsigned long long mip_reg;
-};
-
-struct part_info {
- unsigned char type;
- unsigned char length;
- unsigned char part_id;
- unsigned char apic_mode;
- unsigned long snum;
- char ptype[16];
- char sname[64];
- char pname[64];
-};
-
-struct psai {
- unsigned long long entry_type;
- unsigned long long addr;
- unsigned long long bep_addr;
-};
-
-struct es7000_mem_info {
- unsigned char type;
- unsigned char length;
- unsigned char resv[6];
- unsigned long long start;
- unsigned long long size;
-};
-
-struct es7000_oem_table {
- unsigned long long hdr;
- struct mip_reg_info mip;
- struct part_info pif;
- struct es7000_mem_info shm;
- struct psai psai;
-};
-
-#ifdef CONFIG_ACPI
-
-struct oem_table {
- struct acpi_table_header Header;
- u32 OEMTableAddr;
- u32 OEMTableSize;
-};
-
-extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
-#endif
-
-struct mip_reg {
- unsigned long long off_0;
- unsigned long long off_8;
- unsigned long long off_10;
- unsigned long long off_18;
- unsigned long long off_20;
- unsigned long long off_28;
- unsigned long long off_30;
- unsigned long long off_38;
-};
-
-#define MIP_SW_APIC 0x1020b
-#define MIP_FUNC(VALUE) (VALUE & 0xff)
-
-extern int parse_unisys_oem (char *oemptr);
-extern void setup_unisys(void);
-extern int es7000_start_cpu(int cpu, unsigned long eip);
-extern void es7000_sw_apic(void);
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
index 0dbd7803a1d..6730f4e7c74 100644
--- a/arch/x86/mach-generic/Makefile
+++ b/arch/x86/mach-generic/Makefile
@@ -9,4 +9,3 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_X86_SUMMIT) += summit.o
obj-$(CONFIG_X86_BIGSMP) += bigsmp.o
obj-$(CONFIG_X86_ES7000) += es7000.o
-obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 59d77171455..df37fc9d6a2 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -5,18 +5,17 @@
#define APIC_DEFINITION 1
#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <asm/smp.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/fixmap.h>
#include <asm/apicdef.h>
#include <linux/kernel.h>
-#include <linux/smp.h>
#include <linux/init.h>
#include <linux/dmi.h>
-#include <asm/mach-bigsmp/mach_apic.h>
-#include <asm/mach-bigsmp/mach_apicdef.h>
-#include <asm/mach-bigsmp/mach_ipi.h>
+#include <asm/bigsmp/apicdef.h>
+#include <linux/smp.h>
+#include <asm/bigsmp/apic.h>
+#include <asm/bigsmp/ipi.h>
#include <asm/mach-default/mach_mpparse.h>
static int dmi_bigsmp; /* can be set by dmi scanners */
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4742626f08c..520cca0ee04 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -4,20 +4,19 @@
#define APIC_DEFINITION 1
#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <asm/smp.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/fixmap.h>
#include <asm/apicdef.h>
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/smp.h>
#include <linux/init.h>
-#include <asm/mach-es7000/mach_apicdef.h>
-#include <asm/mach-es7000/mach_apic.h>
-#include <asm/mach-es7000/mach_ipi.h>
-#include <asm/mach-es7000/mach_mpparse.h>
-#include <asm/mach-es7000/mach_wakecpu.h>
+#include <asm/es7000/apicdef.h>
+#include <linux/smp.h>
+#include <asm/es7000/apic.h>
+#include <asm/es7000/ipi.h>
+#include <asm/es7000/mpparse.h>
+#include <asm/es7000/wakecpu.h>
static int probe_es7000(void)
{
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8091e68764c..8cf58394975 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
#define APIC_DEFINITION 1
#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <linux/smp.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/fixmap.h>
@@ -12,11 +11,12 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <asm/mach-numaq/mach_apic.h>
-#include <asm/mach-numaq/mach_apicdef.h>
-#include <asm/mach-numaq/mach_ipi.h>
-#include <asm/mach-numaq/mach_mpparse.h>
-#include <asm/mach-numaq/mach_wakecpu.h>
+#include <asm/numaq/apicdef.h>
+#include <linux/smp.h>
+#include <asm/numaq/apic.h>
+#include <asm/numaq/ipi.h>
+#include <asm/numaq/mpparse.h>
+#include <asm/numaq/wakecpu.h>
#include <asm/numaq.h>
static int mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index a97ea0f35b1..6ad6b67a723 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -4,19 +4,18 @@
#define APIC_DEFINITION 1
#include <linux/threads.h>
#include <linux/cpumask.h>
-#include <asm/smp.h>
#include <asm/mpspec.h>
#include <asm/genapic.h>
#include <asm/fixmap.h>
#include <asm/apicdef.h>
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/smp.h>
#include <linux/init.h>
-#include <asm/mach-summit/mach_apic.h>
-#include <asm/mach-summit/mach_apicdef.h>
-#include <asm/mach-summit/mach_ipi.h>
-#include <asm/mach-summit/mach_mpparse.h>
+#include <asm/summit/apicdef.h>
+#include <linux/smp.h>
+#include <asm/summit/apic.h>
+#include <asm/summit/ipi.h>
+#include <asm/summit/mpparse.h>
static int probe_summit(void)
{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe67b4..8f92cac4e6d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm-generic/sections.h>
+#include <asm/traps.h>
/*
* Page fault error code bits
@@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
return 0;
}
-void do_invalid_op(struct pt_regs *, unsigned long);
-
static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_F00F_BUG
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d37f29376b0..4974e97dedf 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -47,6 +47,7 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
#include <asm/cacheflush.h>
+#include <asm/smp.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4b6e6a29ae..cac6da54203 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -421,7 +421,7 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
return;
}
-int __initdata early_ioremap_debug;
+static int __initdata early_ioremap_debug;
static int __init early_ioremap_debug_setup(char *str)
{
@@ -547,7 +547,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
}
-int __initdata early_ioremap_nested;
+static int __initdata early_ioremap_nested;
static int __init check_early_ioremap_leak(void)
{
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 19af06927fb..1d88d2b3977 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -250,10 +250,5 @@ int __init pci_acpi_init(void)
acpi_pci_irq_enable(dev);
}
-#ifdef CONFIG_X86_IO_APIC
- if (acpi_ioapic)
- print_IO_APIC();
-#endif
-
return 0;
}
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index d765da91384..844df0cbbd3 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -31,11 +31,9 @@
#include <linux/ioport.h>
#include <linux/errno.h>
#include <linux/bootmem.h>
-#include <linux/acpi.h>
#include <asm/pat.h>
-#include <asm/hpet.h>
-#include <asm/io_apic.h>
+#include <asm/e820.h>
#include "pci.h"
@@ -80,77 +78,6 @@ pcibios_align_resource(void *data, struct resource *res,
}
EXPORT_SYMBOL(pcibios_align_resource);
-static int check_res_with_valid(struct pci_dev *dev, struct resource *res)
-{
- unsigned long base;
- unsigned long size;
- int i;
-
- base = res->start;
- size = (res->start == 0 && res->end == res->start) ? 0 :
- (res->end - res->start + 1);
-
- if (!base || !size)
- return 0;
-
-#ifdef CONFIG_HPET_TIMER
- /* for hpet */
- if (base == hpet_address && (res->flags & IORESOURCE_MEM)) {
- dev_info(&dev->dev, "BAR has HPET at %08lx-%08lx\n",
- base, base + size - 1);
- return 1;
- }
-#endif
-
-#ifdef CONFIG_X86_IO_APIC
- for (i = 0; i < nr_ioapics; i++) {
- unsigned long ioapic_phys = mp_ioapics[i].mp_apicaddr;
-
- if (base == ioapic_phys && (res->flags & IORESOURCE_MEM)) {
- dev_info(&dev->dev, "BAR has ioapic at %08lx-%08lx\n",
- base, base + size - 1);
- return 1;
- }
- }
-#endif
-
-#ifdef CONFIG_PCI_MMCONFIG
- for (i = 0; i < pci_mmcfg_config_num; i++) {
- unsigned long addr;
-
- addr = pci_mmcfg_config[i].address;
- if (base == addr && (res->flags & IORESOURCE_MEM)) {
- dev_info(&dev->dev, "BAR has MMCONFIG at %08lx-%08lx\n",
- base, base + size - 1);
- return 1;
- }
- }
-#endif
-
- return 0;
-}
-
-static int check_platform(struct pci_dev *dev, struct resource *res)
-{
- struct resource *root = NULL;
-
- /*
- * forcibly insert it into the
- * resource tree
- */
- if (res->flags & IORESOURCE_MEM)
- root = &iomem_resource;
- else if (res->flags & IORESOURCE_IO)
- root = &ioport_resource;
-
- if (root && check_res_with_valid(dev, res)) {
- insert_resource(root, res);
-
- return 1;
- }
-
- return 0;
-}
/*
* Handle resources of PCI devices. If the world were perfect, we could
* just allocate all the resource regions and do nothing more. It isn't.
@@ -202,10 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
pr = pci_find_parent_resource(dev, r);
if (!r->start || !pr ||
request_resource(pr, r) < 0) {
- if (check_platform(dev, r))
- continue;
- dev_err(&dev->dev, "BAR %d: can't "
- "allocate resource\n", idx);
+ dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx);
/*
* Something is wrong with the region.
* Invalidate the resource to prevent
@@ -240,17 +164,13 @@ static void __init pcibios_allocate_resources(int pass)
else
disabled = !(command & PCI_COMMAND_MEMORY);
if (pass == disabled) {
- dev_dbg(&dev->dev, "resource %#08llx-%#08llx "
- "(f=%lx, d=%d, p=%d)\n",
+ dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n",
(unsigned long long) r->start,
(unsigned long long) r->end,
r->flags, disabled, pass);
pr = pci_find_parent_resource(dev, r);
if (!pr || request_resource(pr, r) < 0) {
- if (check_platform(dev, r))
- continue;
- dev_err(&dev->dev, "BAR %d: can't "
- "allocate resource\n", idx);
+ dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx);
/* We'll assign a new address later */
r->end -= r->start;
r->start = 0;
@@ -308,6 +228,8 @@ void __init pcibios_resource_survey(void)
pcibios_allocate_bus_resources(&pci_root_buses);
pcibios_allocate_resources(0);
pcibios_allocate_resources(1);
+
+ e820_reserve_resources_late();
}
/**
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index d9635764ce3..654a2234f8f 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -209,7 +209,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
return name != NULL;
}
-static void __init pci_mmcfg_insert_resources(unsigned long resource_flags)
+static void __init pci_mmcfg_insert_resources(void)
{
#define PCI_MMCFG_RESOURCE_NAME_LEN 19
int i;
@@ -233,7 +233,7 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags)
cfg->pci_segment);
res->start = cfg->address;
res->end = res->start + (num_buses << 20) - 1;
- res->flags = IORESOURCE_MEM | resource_flags;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
insert_resource(&iomem_resource, res);
names += PCI_MMCFG_RESOURCE_NAME_LEN;
}
@@ -434,11 +434,9 @@ static void __init __pci_mmcfg_init(int early)
(pci_mmcfg_config[0].address == 0))
return;
- if (pci_mmcfg_arch_init()) {
- if (known_bridge)
- pci_mmcfg_insert_resources(IORESOURCE_BUSY);
+ if (pci_mmcfg_arch_init())
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
- } else {
+ else {
/*
* Signal not to attempt to insert mmcfg resources because
* the architecture mmcfg setup could not initialize.
@@ -475,7 +473,7 @@ static int __init pci_mmcfg_late_insert_resources(void)
* marked so it won't cause request errors when __request_region is
* called.
*/
- pci_mmcfg_insert_resources(0);
+ pci_mmcfg_insert_resources();
return 0;
}
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index d3e083dea72..274d06082f4 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -11,6 +11,7 @@
#include <linux/suspend.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
+#include <asm/xcr.h>
static struct saved_context saved_context;
@@ -126,6 +127,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
if (boot_cpu_has(X86_FEATURE_SEP))
enable_sep_cpu();
+ /*
+ * restore XCR0 for xsave capable cpu's.
+ */
+ if (cpu_has_xsave)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+
fix_processor_context();
do_fpu_end();
mtrr_ap_init();
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 66bdfb591fd..e3b6cf70d62 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -14,6 +14,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mtrr.h>
+#include <asm/xcr.h>
static void fix_processor_context(void);
@@ -122,6 +123,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
wrmsrl(MSR_GS_BASE, ctxt->gs_base);
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ /*
+ * restore XCR0 for xsave capable cpu's.
+ */
+ if (cpu_has_xsave)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+
fix_processor_context();
do_fpu_end();
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3cbf08..8d28925ebed 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -36,6 +36,7 @@
#include <xen/hvc-console.h>
#include <asm/paravirt.h>
+#include <asm/apic.h>
#include <asm/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -580,16 +581,47 @@ static void xen_io_delay(void)
}
#ifdef CONFIG_X86_LOCAL_APIC
-static u32 xen_apic_read(unsigned long reg)
+static u32 xen_apic_read(u32 reg)
{
return 0;
}
-static void xen_apic_write(unsigned long reg, u32 val)
+static void xen_apic_write(u32 reg, u32 val)
{
/* Warn to see if there's any stray references */
WARN_ON(1);
}
+
+static u64 xen_apic_icr_read(void)
+{
+ return 0;
+}
+
+static void xen_apic_icr_write(u32 low, u32 id)
+{
+ /* Warn to see if there's any stray references */
+ WARN_ON(1);
+}
+
+static void xen_apic_wait_icr_idle(void)
+{
+ return;
+}
+
+static u32 xen_safe_apic_wait_icr_idle(void)
+{
+ return 0;
+}
+
+static struct apic_ops xen_basic_apic_ops = {
+ .read = xen_apic_read,
+ .write = xen_apic_write,
+ .icr_read = xen_apic_icr_read,
+ .icr_write = xen_apic_icr_write,
+ .wait_icr_idle = xen_apic_wait_icr_idle,
+ .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
+};
+
#endif
static void xen_flush_tlb(void)
@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
static const struct pv_apic_ops xen_apic_ops __initdata = {
#ifdef CONFIG_X86_LOCAL_APIC
- .apic_write = xen_apic_write,
- .apic_read = xen_apic_read,
.setup_boot_clock = paravirt_nop,
.setup_secondary_clock = paravirt_nop,
.startup_ipi_hook = paravirt_nop,
@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void)
pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops;
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * set up the basic apic ops.
+ */
+ apic_ops = &xen_basic_apic_ops;
+#endif
+
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;