aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig29
-rw-r--r--arch/x86/Kconfig.cpu24
-rw-r--r--arch/x86/boot/compressed/.gitignore2
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c5
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h24
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/byteorder.h74
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h10
-rw-r--r--arch/x86/include/asm/ds.h6
-rw-r--r--arch/x86/include/asm/dwarf2.h97
-rw-r--r--arch/x86/include/asm/es7000/wakecpu.h9
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/fixmap_32.h4
-rw-r--r--arch/x86/include/asm/ftrace.h4
-rw-r--r--arch/x86/include/asm/highmem.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/io.h6
-rw-r--r--arch/x86/include/asm/iomap.h30
-rw-r--r--arch/x86/include/asm/iommu.h2
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_regs_32.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h20
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/linkage.h60
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h9
-rw-r--r--arch/x86/include/asm/mmzone_32.h4
-rw-r--r--arch/x86/include/asm/msr.h2
-rw-r--r--arch/x86/include/asm/pci_64.h14
-rw-r--r--arch/x86/include/asm/pgtable-3level.h4
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/topology.h9
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h1
-rw-r--r--arch/x86/include/asm/voyager.h1
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/boot.c1
-rw-r--r--arch/x86/kernel/amd_iommu.c45
-rw-r--r--arch/x86/kernel/amd_iommu_init.c6
-rw-r--r--arch/x86/kernel/apic.c2
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c2
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h17
-rw-r--r--arch/x86/kernel/ds.c81
-rw-r--r--arch/x86/kernel/e820.c8
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/entry_32.S481
-rw-r--r--arch/x86/kernel/entry_64.S1364
-rw-r--r--arch/x86/kernel/es7000_32.c9
-rw-r--r--arch/x86/kernel/ftrace.c50
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c7
-rw-r--r--arch/x86/kernel/hpet.c4
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c2
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/io_apic.c60
-rw-r--r--arch/x86/kernel/irq_64.c24
-rw-r--r--arch/x86/kernel/irqinit_32.c2
-rw-r--r--arch/x86/kernel/irqinit_64.c66
-rw-r--r--arch/x86/kernel/k8.c1
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c5
-rw-r--r--arch/x86/kernel/microcode_amd.c2
-rw-r--r--arch/x86/kernel/microcode_core.c4
-rw-r--r--arch/x86/kernel/mpparse.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c16
-rw-r--r--arch/x86/kernel/pci-gart_64.c4
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c14
-rw-r--r--arch/x86/kernel/reboot.c15
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/tlb_32.c6
-rw-r--r--arch/x86/kernel/tlb_64.c5
-rw-r--r--arch/x86/kernel/tsc.c12
-rw-r--r--arch/x86/kernel/tsc_sync.c4
-rw-r--r--arch/x86/kernel/vsmp_64.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c9
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/i8254.c13
-rw-r--r--arch/x86/kvm/i8254.h1
-rw-r--r--arch/x86/kvm/mmu.c5
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/vmx.c7
-rw-r--r--arch/x86/kvm/vmx.h1
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/lguest/boot.c35
-rw-r--r--arch/x86/mach-voyager/setup.c2
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c30
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/mm/init_32.c5
-rw-r--r--arch/x86/mm/init_64.c75
-rw-r--r--arch/x86/mm/iomap_32.c59
-rw-r--r--arch/x86/mm/ioremap.c22
-rw-r--r--arch/x86/mm/numa_32.c35
-rw-r--r--arch/x86/mm/pageattr.c8
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/oprofile/nmi_int.c5
-rw-r--r--arch/x86/oprofile/op_model_ppro.c15
-rw-r--r--arch/x86/pci/fixup.c25
-rw-r--r--arch/x86/power/hibernate_32.c4
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c5
-rw-r--r--arch/x86/xen/mmu.c50
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
113 files changed, 1887 insertions, 1435 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 350bee1d54d..d4d4cb7629e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,7 +28,7 @@ config X86
select HAVE_KRETPROBES
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
- select HAVE_FTRACE
+ select HAVE_FUNCTION_TRACER
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
select HAVE_ARCH_TRACEHOOK
@@ -167,9 +167,12 @@ config GENERIC_PENDING_IRQ
config X86_SMP
bool
depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
- select USE_GENERIC_SMP_HELPERS
default y
+config USE_GENERIC_SMP_HELPERS
+ def_bool y
+ depends on SMP
+
config X86_32_SMP
def_bool y
depends on X86_32 && SMP
@@ -231,25 +234,21 @@ config SMP
If you don't know what to do here, say N.
+config X86_HAS_BOOT_CPU_ID
+ def_bool y
+ depends on X86_VOYAGER
+
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
-if ACPI
config X86_MPPARSE
- def_bool y
- bool "Enable MPS table"
+ bool "Enable MPS table" if ACPI
+ default y
depends on X86_LOCAL_APIC
help
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-endif
-
-if !ACPI
-config X86_MPPARSE
- def_bool y
- depends on X86_LOCAL_APIC
-endif
choice
prompt "Subarchitecture Type"
@@ -1490,7 +1489,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool X86_64
depends on NUMA
-menu "Power management options"
+menu "Power management and ACPI options"
depends on !X86_VOYAGER
config ARCH_HIBERNATION_HEADER
@@ -1890,6 +1889,10 @@ config SYSVIPC_COMPAT
endmenu
+config HAVE_ATOMIC_IOMAP
+ def_bool y
+ depends on X86_32
+
source "net/Kconfig"
source "drivers/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 0b7c4a3f065..b815664fe37 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -513,19 +513,19 @@ config CPU_SUP_UMC_32
If unsure, say N.
config X86_DS
- bool "Debug Store support"
- default y
- help
- Add support for Debug Store.
- This allows the kernel to provide a memory buffer to the hardware
- to store various profiling and tracing events.
+ def_bool X86_PTRACE_BTS
+ depends on X86_DEBUGCTLMSR
config X86_PTRACE_BTS
- bool "ptrace interface to Branch Trace Store"
+ bool "Branch Trace Store"
default y
- depends on (X86_DS && X86_DEBUGCTLMSR)
+ depends on X86_DEBUGCTLMSR
help
- Add a ptrace interface to allow collecting an execution trace
- of the traced task.
- This collects control flow changes in a (cyclic) buffer and allows
- debuggers to fill in the gaps and show an execution trace of the debuggee.
+ This adds a ptrace interface to the hardware's branch trace store.
+
+ Debuggers may use it to collect an execution trace of the debugged
+ application in order to answer the question 'how did I get here?'.
+ Debuggers may trace user mode as well as kernel mode.
+
+ Say Y unless there is no application development on this machine
+ and you want to save a small amount of code size.
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
index be0ed065249..63eff3b04d0 100644
--- a/arch/x86/boot/compressed/.gitignore
+++ b/arch/x86/boot/compressed/.gitignore
@@ -1 +1,3 @@
relocs
+vmlinux.bin.all
+vmlinux.relocs
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b39328..7e8e8b25f5f 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
{
u8 pending;
asm volatile("int $0x16; setnz %0"
- : "=rm" (pending)
+ : "=qm" (pending)
: "a" (0x0100));
return pending;
}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 1267977e770..9ddf2fa0129 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -567,11 +567,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
- /* Make -mregparm=3 work */
- regs->ax = sig;
- regs->dx = (unsigned long) &frame->info;
- regs->cx = (unsigned long) &frame->uc;
-
loadsegment(ds, __USER32_DS);
loadsegment(es, __USER32_DS);
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8d676d8ecde..9830681446a 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void)
acpi_pci_disabled = 1;
acpi_noirq_set();
}
-extern int acpi_irq_balance_set(char *str);
/* routines for saving/restoring kernel state */
extern int acpi_save_state_mem(void);
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1a30c0440c6..ac302a2fa33 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -251,13 +251,6 @@ struct amd_iommu {
/* Pointer to PCI device of this IOMMU */
struct pci_dev *dev;
- /*
- * Capability pointer. There could be more than one IOMMU per PCI
- * device function if there are more than one AMD IOMMU capability
- * pointers.
- */
- u16 cap_ptr;
-
/* physical address of MMIO space */
u64 mmio_phys;
/* virtual address of MMIO space */
@@ -266,6 +259,13 @@ struct amd_iommu {
/* capabilities of that IOMMU read from ACPI */
u32 cap;
+ /*
+ * Capability pointer. There could be more than one IOMMU per PCI
+ * device function if there are more than one AMD IOMMU capability
+ * pointers.
+ */
+ u16 cap_ptr;
+
/* pci domain of this IOMMU */
u16 pci_seg;
@@ -284,19 +284,19 @@ struct amd_iommu {
/* size of command buffer */
u32 cmd_buf_size;
- /* event buffer virtual address */
- u8 *evt_buf;
/* size of event buffer */
u32 evt_buf_size;
+ /* event buffer virtual address */
+ u8 *evt_buf;
/* MSI number for event interrupt */
u16 evt_msi_num;
- /* if one, we need to send a completion wait command */
- int need_sync;
-
/* true if interrupts for this IOMMU are already enabled */
bool int_enabled;
+ /* if one, we need to send a completion wait command */
+ int need_sync;
+
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
};
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 36001032271..9fa9dcdf344 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
*/
static inline void change_bit(int nr, volatile unsigned long *addr)
{
- asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr));
+ if (IS_IMMEDIATE(nr)) {
+ asm volatile(LOCK_PREFIX "xorb %1,%0"
+ : CONST_MASK_ADDR(nr, addr)
+ : "iq" ((u8)CONST_MASK(nr)));
+ } else {
+ asm volatile(LOCK_PREFIX "btc %1,%0"
+ : BITOP_ADDR(addr)
+ : "Ir" (nr));
+ }
}
/**
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h
index e02ae2d89ac..f110ad417df 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/asm/byteorder.h
@@ -4,26 +4,33 @@
#include <asm/types.h>
#include <linux/compiler.h>
-#ifdef __GNUC__
+#define __LITTLE_ENDIAN
-#ifdef __i386__
-
-static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
+static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
{
-#ifdef CONFIG_X86_BSWAP
- asm("bswap %0" : "=r" (x) : "0" (x));
-#else
+#ifdef __i386__
+# ifdef CONFIG_X86_BSWAP
+ asm("bswap %0" : "=r" (val) : "0" (val));
+# else
asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
"rorl $16,%0\n\t" /* swap words */
"xchgb %b0,%h0" /* swap higher bytes */
- : "=q" (x)
- : "0" (x));
+ : "=q" (val)
+ : "0" (val));
+# endif
+
+#else /* __i386__ */
+ asm("bswapl %0"
+ : "=r" (val)
+ : "0" (val));
#endif
- return x;
+ return val;
}
+#define __arch_swab32 __arch_swab32
-static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
+static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
{
+#ifdef __i386__
union {
struct {
__u32 a;
@@ -32,50 +39,27 @@ static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
__u64 u;
} v;
v.u = val;
-#ifdef CONFIG_X86_BSWAP
+# ifdef CONFIG_X86_BSWAP
asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
-#else
- v.s.a = ___arch__swab32(v.s.a);
- v.s.b = ___arch__swab32(v.s.b);
+# else
+ v.s.a = __arch_swab32(v.s.a);
+ v.s.b = __arch_swab32(v.s.b);
asm("xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
-#endif
+# endif
return v.u;
-}
-
#else /* __i386__ */
-
-static inline __attribute_const__ __u64 ___arch__swab64(__u64 x)
-{
asm("bswapq %0"
- : "=r" (x)
- : "0" (x));
- return x;
-}
-
-static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
-{
- asm("bswapl %0"
- : "=r" (x)
- : "0" (x));
- return x;
-}
-
+ : "=r" (val)
+ : "0" (val));
+ return val;
#endif
+}
+#define __arch_swab64 __arch_swab64
-/* Do not define swab16. Gcc is smart enough to recognize "C" version and
- convert it into rotation or exhange. */
-
-#define __arch__swab64(x) ___arch__swab64(x)
-#define __arch__swab32(x) ___arch__swab32(x)
-
-#define __BYTEORDER_HAS_U64__
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/little_endian.h>
+#include <linux/byteorder.h>
#endif /* _ASM_X86_BYTEORDER_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index f73e95d75b4..cfdf8c2c5c3 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -91,7 +91,7 @@
#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */
#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */
-#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */
+#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 4a5397bfce2..097794ff6b7 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
/* Make sure we keep the same behaviour */
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
-#ifdef CONFIG_X86_32
- return 0;
-#else
+#ifdef CONFIG_X86_64
struct dma_mapping_ops *ops = get_dma_ops(dev);
if (ops->mapping_error)
return ops->mapping_error(dev, dma_addr);
- return (dma_addr == bad_dma_address);
#endif
+ return (dma_addr == bad_dma_address);
}
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
@@ -255,9 +253,11 @@ static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
{
-#ifdef CONFIG_X86_64
unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
+ if (dma_mask <= DMA_24BIT_MASK)
+ gfp |= GFP_DMA;
+#ifdef CONFIG_X86_64
if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA))
gfp |= GFP_DMA32;
#endif
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a190bf4..a95008457ea 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -23,12 +23,13 @@
#ifndef _ASM_X86_DS_H
#define _ASM_X86_DS_H
-#ifdef CONFIG_X86_DS
#include <linux/types.h>
#include <linux/init.h>
+#ifdef CONFIG_X86_DS
+
struct task_struct;
/*
@@ -232,7 +233,8 @@ extern void ds_free(struct ds_context *context);
#else /* CONFIG_X86_DS */
-#define ds_init_intel(config) do {} while (0)
+struct cpuinfo_x86;
+static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 804b6e6be92..3afc5e87cfd 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -6,56 +6,91 @@
#endif
/*
- Macros for dwarf2 CFI unwind table entries.
- See "as.info" for details on these pseudo ops. Unfortunately
- they are only supported in very new binutils, so define them
- away for older version.
+ * Macros for dwarf2 CFI unwind table entries.
+ * See "as.info" for details on these pseudo ops. Unfortunately
+ * they are only supported in very new binutils, so define them
+ * away for older version.
*/
#ifdef CONFIG_AS_CFI
-#define CFI_STARTPROC .cfi_startproc
-#define CFI_ENDPROC .cfi_endproc
-#define CFI_DEF_CFA .cfi_def_cfa
-#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
-#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
-#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
-#define CFI_OFFSET .cfi_offset
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_REGISTER .cfi_register
-#define CFI_RESTORE .cfi_restore
-#define CFI_REMEMBER_STATE .cfi_remember_state
-#define CFI_RESTORE_STATE .cfi_restore_state
-#define CFI_UNDEFINED .cfi_undefined
+#define CFI_STARTPROC .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#define CFI_DEF_CFA .cfi_def_cfa
+#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
+#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
+#define CFI_OFFSET .cfi_offset
+#define CFI_REL_OFFSET .cfi_rel_offset
+#define CFI_REGISTER .cfi_register
+#define CFI_RESTORE .cfi_restore
+#define CFI_REMEMBER_STATE .cfi_remember_state
+#define CFI_RESTORE_STATE .cfi_restore_state
+#define CFI_UNDEFINED .cfi_undefined
#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
-#define CFI_SIGNAL_FRAME .cfi_signal_frame
+#define CFI_SIGNAL_FRAME .cfi_signal_frame
#else
#define CFI_SIGNAL_FRAME
#endif
#else
-/* Due to the structure of pre-exisiting code, don't use assembler line
- comment character # to ignore the arguments. Instead, use a dummy macro. */
+/*
+ * Due to the structure of pre-exisiting code, don't use assembler line
+ * comment character # to ignore the arguments. Instead, use a dummy macro.
+ */
.macro cfi_ignore a=0, b=0, c=0, d=0
.endm
-#define CFI_STARTPROC cfi_ignore
-#define CFI_ENDPROC cfi_ignore
-#define CFI_DEF_CFA cfi_ignore
+#define CFI_STARTPROC cfi_ignore
+#define CFI_ENDPROC cfi_ignore
+#define CFI_DEF_CFA cfi_ignore
#define CFI_DEF_CFA_REGISTER cfi_ignore
#define CFI_DEF_CFA_OFFSET cfi_ignore
#define CFI_ADJUST_CFA_OFFSET cfi_ignore
-#define CFI_OFFSET cfi_ignore
-#define CFI_REL_OFFSET cfi_ignore
-#define CFI_REGISTER cfi_ignore
-#define CFI_RESTORE cfi_ignore
-#define CFI_REMEMBER_STATE cfi_ignore
-#define CFI_RESTORE_STATE cfi_ignore
-#define CFI_UNDEFINED cfi_ignore
-#define CFI_SIGNAL_FRAME cfi_ignore
+#define CFI_OFFSET cfi_ignore
+#define CFI_REL_OFFSET cfi_ignore
+#define CFI_REGISTER cfi_ignore
+#define CFI_RESTORE cfi_ignore
+#define CFI_REMEMBER_STATE cfi_ignore
+#define CFI_RESTORE_STATE cfi_ignore
+#define CFI_UNDEFINED cfi_ignore
+#define CFI_SIGNAL_FRAME cfi_ignore
#endif
+/*
+ * An attempt to make CFI annotations more or less
+ * correct and shorter. It is implied that you know
+ * what you're doing if you use them.
+ */
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_X86_64
+ .macro pushq_cfi reg
+ pushq \reg
+ CFI_ADJUST_CFA_OFFSET 8
+ .endm
+
+ .macro popq_cfi reg
+ popq \reg
+ CFI_ADJUST_CFA_OFFSET -8
+ .endm
+
+ .macro movq_cfi reg offset=0
+ movq %\reg, \offset(%rsp)
+ CFI_REL_OFFSET \reg, \offset
+ .endm
+
+ .macro movq_cfi_restore offset reg
+ movq \offset(%rsp), %\reg
+ CFI_RESTORE \reg
+ .endm
+#else /*!CONFIG_X86_64*/
+
+ /* 32bit defenitions are missed yet */
+
+#endif /*!CONFIG_X86_64*/
+#endif /*__ASSEMBLY__*/
+
#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 3ffc5a7bf66..39849346191 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -50,10 +50,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
}
-#if APIC_DEBUG
- #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid)
-#else
- #define inquire_remote_apic(apicid) {}
-#endif
+#define inquire_remote_apic(apicid) do { \
+ if (apic_verbosity >= APIC_DEBUG) \
+ __inquire_remote_apic(apicid); \
+ } while (0)
#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8668a94f850..23696d44a0a 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -9,6 +9,10 @@
extern int fixmaps_set;
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+extern pte_t *pkmap_page_table;
+
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
void native_set_fixmap(enum fixed_addresses idx,
unsigned long phys, pgprot_t flags);
diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h
index 09f29ab5c13..c7115c1d721 100644
--- a/arch/x86/include/asm/fixmap_32.h
+++ b/arch/x86/include/asm/fixmap_32.h
@@ -28,10 +28,8 @@ extern unsigned long __FIXADDR_TOP;
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
-#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
-#endif
/*
* Here we define all the compile-time 'special' virtual
@@ -75,10 +73,8 @@ enum fixed_addresses {
#ifdef CONFIG_X86_CYCLONE_TIMER
FIX_CYCLONE_TIMER, /*cyclone timer register*/
#endif
-#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#endif
#ifdef CONFIG_PCI_MMCONFIG
FIX_PCIE_MCFG,
#endif
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 47f7e65e6c1..9e8bc29b8b1 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -1,7 +1,7 @@
#ifndef _ASM_X86_FTRACE_H
#define _ASM_X86_FTRACE_H
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_ADDR ((long)(mcount))
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
@@ -19,6 +19,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
}
#endif
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index a3b3b7c3027..bf9276bea66 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -25,14 +25,11 @@
#include <asm/kmap_types.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
+#include <asm/fixmap.h>
/* declarations for highmem.c */
extern unsigned long highstart_pfn, highend_pfn;
-extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
-extern pte_t *pkmap_page_table;
-
/*
* Right now we initialize only a single pte table. It can be extended
* easily, subsequent pte tables have to be allocated in one physical
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b97aecb0b61..8de644b6b95 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
#endif
#endif
-#ifdef CONFIG_X86_32
-extern void (*const interrupt[NR_VECTORS])(void);
-#endif
+extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
typedef int vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 5618a103f39..ac2abc88cd9 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -82,9 +82,9 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
extern void early_ioremap_init(void);
extern void early_ioremap_clear(void);
extern void early_ioremap_reset(void);
-extern void *early_ioremap(unsigned long offset, unsigned long size);
-extern void *early_memremap(unsigned long offset, unsigned long size);
-extern void early_iounmap(void *addr, unsigned long size);
+extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
+extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
+extern void early_iounmap(void __iomem *addr, unsigned long size);
extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
new file mode 100644
index 00000000000..c1f06289b14
--- /dev/null
+++ b/arch/x86/include/asm/iomap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2008 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+void *
+iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void
+iounmap_atomic(void *kvaddr, enum km_type type);
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 98e28ea8cd1..0b500c5b644 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,8 +6,6 @@ extern void no_iommu_init(void);
extern struct dma_mapping_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
-extern int dmar_disabled;
-extern int forbid_dac;
extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda9548..28e409fc73f 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -31,10 +31,6 @@ static inline int irq_canonicalize(int irq)
# endif
#endif
-#ifdef CONFIG_IRQBALANCE
-extern int irqbalance_disable(char *str);
-#endif
-
#ifdef CONFIG_HOTPLUG_CPU
#include <linux/cpumask.h>
extern void fixup_irqs(cpumask_t map);
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index af2f02d27fc..86afd747345 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -9,6 +9,8 @@
#include <asm/percpu.h>
+#define ARCH_HAS_OWN_IRQ_REGS
+
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index d843ed0e9b2..0005adb0f94 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,30 +101,22 @@
#define LAST_VM86_IRQ 15
#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
# if NR_CPUS < MAX_IO_APICS
# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
# else
# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
# endif
-#elif !defined(CONFIG_X86_VOYAGER)
+#elif defined(CONFIG_X86_VOYAGER)
-# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS)
-
-# define NR_IRQS 224
-
-# else /* IO_APIC || PARAVIRT */
-
-# define NR_IRQS 16
-
-# endif
+# define NR_IRQS 224
-#else /* !VISWS && !VOYAGER */
+#else /* IO_APIC || VOYAGER */
-# define NR_IRQS 224
+# define NR_IRQS 16
-#endif /* VISWS */
+#endif
/* Voyager specific defines */
/* These define the CPIs we use in linux */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 65679d00633..8346be87cfa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -364,6 +364,9 @@ struct kvm_arch{
struct page *ept_identity_pagetable;
bool ept_identity_pagetable_done;
+
+ unsigned long irq_sources_bitmap;
+ unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
};
struct kvm_vm_stat {
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f61ee8f937e..5d98d0b68ff 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -57,5 +57,65 @@
#define __ALIGN_STR ".align 16,0x90"
#endif
+/*
+ * to check ENTRY_X86/END_X86 and
+ * KPROBE_ENTRY_X86/KPROBE_END_X86
+ * unbalanced-missed-mixed appearance
+ */
+#define __set_entry_x86 .set ENTRY_X86_IN, 0
+#define __unset_entry_x86 .set ENTRY_X86_IN, 1
+#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
+#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
+
+#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
+
+#define __check_entry_x86 \
+ .ifdef ENTRY_X86_IN; \
+ .ifeq ENTRY_X86_IN; \
+ __macro_err_x86; \
+ .abort; \
+ .endif; \
+ .endif
+
+#define __check_kprobe_x86 \
+ .ifdef KPROBE_X86_IN; \
+ .ifeq KPROBE_X86_IN; \
+ __macro_err_x86; \
+ .abort; \
+ .endif; \
+ .endif
+
+#define __check_entry_kprobe_x86 \
+ __check_entry_x86; \
+ __check_kprobe_x86
+
+#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
+
+#define ENTRY_X86(name) \
+ __check_entry_kprobe_x86; \
+ __set_entry_x86; \
+ .globl name; \
+ __ALIGN; \
+ name:
+
+#define END_X86(name) \
+ __unset_entry_x86; \
+ __check_entry_kprobe_x86; \
+ .size name, .-name
+
+#define KPROBE_ENTRY_X86(name) \
+ __check_entry_kprobe_x86; \
+ __set_kprobe_x86; \
+ .pushsection .kprobes.text, "ax"; \
+ .globl name; \
+ __ALIGN; \
+ name:
+
+#define KPROBE_END_X86(name) \
+ __unset_kprobe_x86; \
+ __check_entry_kprobe_x86; \
+ .size name, .-name; \
+ .popsection
+
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index d5c0b826a4f..9d80db91e99 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -33,10 +33,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
}
-#if APIC_DEBUG
- #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid)
-#else
- #define inquire_remote_apic(apicid) {}
-#endif
+#define inquire_remote_apic(apicid) do { \
+ if (apic_verbosity >= APIC_DEBUG) \
+ __inquire_remote_apic(apicid); \
+ } while (0)
#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 485bdf059ff..07f1af494ca 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void)
extern int early_pfn_to_nid(unsigned long pfn);
+extern void resume_map_numa_kva(pgd_t *pgd);
+
#else /* !CONFIG_NUMA */
#define get_memcfg_numa get_memcfg_numa_flat
+static inline void resume_map_numa_kva(pgd_t *pgd) {}
+
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 46be2fa7ac2..c2a812ebde8 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -108,9 +108,7 @@ static __always_inline unsigned long long __native_read_tsc(void)
{
DECLARE_ARGS(val, low, high);
- rdtsc_barrier();
asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
- rdtsc_barrier();
return EAX_EDX_VAL(val, low, high);
}
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index 5b28995d664..d02d936840a 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void);
*/
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
-
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
dma_addr_t ADDR_NAME;
#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
@@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void);
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
(((PTR)->LEN_NAME) = (VAL))
-#else
-/* No IOMMU */
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME) (0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME) (0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
-
-#endif
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index fb16cec702e..52597aeadff 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -120,13 +120,13 @@ static inline void pud_clear(pud_t *pudp)
write_cr3(pgd);
}
-#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK))
+#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
/* Find an entry in the second-level page table.. */
-#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \
+#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \
pmd_index(address))
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index d1531c8480b..eefb0594b05 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
-#define __ARCH_WANT_COMPAT_SYS_PTRACE
-
#endif /* __KERNEL__ */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2766021aef8..d12811ce51d 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -225,5 +225,11 @@ static inline int hard_smp_processor_id(void)
#endif /* CONFIG_X86_LOCAL_APIC */
+#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
+extern unsigned char boot_cpu_id;
+#else
+#define boot_cpu_id 0
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 90ac7718469..ff386ff50ed 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -154,7 +154,7 @@ extern unsigned long node_remap_size[];
#endif
-/* sched_domains SD_NODE_INIT for NUMAQ machines */
+/* sched_domains SD_NODE_INIT for NUMA machines */
#define SD_NODE_INIT (struct sched_domain) { \
.min_interval = 8, \
.max_interval = 32, \
@@ -169,8 +169,9 @@ extern unsigned long node_remap_size[];
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_EXEC \
| SD_BALANCE_FORK \
- | SD_SERIALIZE \
- | SD_WAKE_BALANCE, \
+ | SD_WAKE_AFFINE \
+ | SD_WAKE_BALANCE \
+ | SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 1, \
}
@@ -238,7 +239,7 @@ struct pci_bus;
void set_pci_bus_resources_arch_default(struct pci_bus *b);
#ifdef CONFIG_SMP
-#define mc_capable() (boot_cpu_data.x86_max_cores > 1)
+#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids)
#define smt_capable() (smp_num_siblings > 1)
#endif
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 664f15280f1..f8cfd00db45 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -46,7 +46,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
return ret;
case 10:
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
- ret, "q", "", "=r", 16);
+ ret, "q", "", "=r", 10);
if (unlikely(ret))
return ret;
__get_user_asm(*(u16 *)(8 + (char *)dst),
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 834b2c1d89f..d2e415e6666 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
#define __NR_timerfd_gettime 287
__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_paccept 288
-__SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_accept4 288
+__SYSCALL(__NR_accept4, sys_accept4)
#define __NR_signalfd4 289
__SYSCALL(__NR_signalfd4, sys_signalfd4)
#define __NR_eventfd2 290
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index c6ad93e315c..7a5782610b2 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -13,6 +13,7 @@
#include <linux/numa.h>
#include <linux/percpu.h>
+#include <linux/timer.h>
#include <asm/types.h>
#include <asm/percpu.h>
diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h
index 9c811d2e6f9..b3e64730762 100644
--- a/arch/x86/include/asm/voyager.h
+++ b/arch/x86/include/asm/voyager.h
@@ -520,6 +520,7 @@ extern void voyager_restart(void);
extern void voyager_cat_power_off(void);
extern void voyager_cat_do_common_interrupt(void);
extern void voyager_handle_nmi(void);
+extern void voyager_smp_intr_init(void);
/* Commands for the following are */
#define VOYAGER_PSI_READ 0
#define VOYAGER_PSI_WRITE 1
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ef28c210ebf..3d4346a73a8 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,11 +6,13 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
endif
#
@@ -40,7 +42,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
-obj-y += ds.o
+obj-$(CONFIG_X86_DS) += ds.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c1f76abae9..4c51a2f8fd3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void)
error = acpi_parse_madt_ioapic_entries();
if (!error) {
acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
- acpi_irq_balance_set(NULL);
acpi_ioapic = 1;
smp_found_config = 1;
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a8fd9ebdc8e..5662e226b0c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
/* returns !0 if the IOMMU is caching non-present entries in its TLB */
static int iommu_has_npcache(struct amd_iommu *iommu)
{
- return iommu->cap & IOMMU_CAP_NPCACHE;
+ return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
}
/****************************************************************************
@@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command(iommu, cmd);
+ if (!ret)
+ iommu->need_sync = 1;
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
@@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
- iommu->need_sync = 0;
-
spin_lock_irqsave(&iommu->lock, flags);
+ if (!iommu->need_sync)
+ goto out;
+
+ iommu->need_sync = 0;
+
ret = __iommu_queue_command(iommu, &cmd);
if (ret)
@@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
ret = iommu_queue_command(iommu, &cmd);
- iommu->need_sync = 1;
-
return ret;
}
@@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
ret = iommu_queue_command(iommu, &cmd);
- iommu->need_sync = 1;
-
return ret;
}
@@ -536,6 +537,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
{
address >>= PAGE_SHIFT;
iommu_area_free(dom->bitmap, address, pages);
+
+ if (address >= dom->next_bit)
+ dom->need_flush = true;
}
/****************************************************************************
@@ -759,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu,
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
iommu_queue_inv_dev_entry(iommu, devid);
-
- iommu->need_sync = 1;
}
/*****************************************************************************
@@ -855,6 +857,9 @@ static int get_device_resources(struct device *dev,
print_devid(_bdf, 1);
}
+ if (domain_for_device(_bdf) == NULL)
+ set_device_domain(*iommu, *domain, _bdf);
+
return 1;
}
@@ -992,8 +997,10 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_ops_free_addresses(dma_dom, dma_addr, pages);
- if (amd_iommu_unmap_flush)
+ if (amd_iommu_unmap_flush || dma_dom->need_flush) {
iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
+ dma_dom->need_flush = false;
+ }
}
/*
@@ -1026,8 +1033,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
if (addr == bad_dma_address)
goto out;
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1055,8 +1061,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1122,8 +1127,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
goto unmap;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1168,8 +1172,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
s->dma_address = s->dma_length = 0;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1220,8 +1223,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
goto out;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1252,8 +1254,7 @@ static void free_coherent(struct device *dev, size_t size,
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0cdcda35a05..30ae2701b3d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
-int amd_iommu_isolate; /* if 1, device isolation is enabled */
+int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -1213,7 +1213,9 @@ static int __init parse_amd_iommu_options(char *str)
for (; *str; ++str) {
if (strncmp(str, "isolate", 7) == 0)
amd_iommu_isolate = 1;
- if (strncmp(str, "fullflush", 11) == 0)
+ if (strncmp(str, "share", 5) == 0)
+ amd_iommu_isolate = 0;
+ if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
}
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 04a7f960bbc..16f94879b52 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1315,7 +1315,7 @@ void enable_x2apic(void)
}
}
-void enable_IR_x2apic(void)
+void __init enable_IR_x2apic(void)
{
#ifdef CONFIG_INTR_REMAP
int ret;
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 0d9c993aa93..ef8f831af82 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
*/
void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
unsigned int ht_mask_width, core_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 25581dcb280..b9c9ea0217a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -20,6 +20,7 @@
#include <asm/pat.h>
#include <asm/asm.h>
#include <asm/numa.h>
+#include <asm/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
@@ -549,6 +550,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_early_init(c);
validate_pat_support(c);
+
+#ifdef CONFIG_SMP
+ c->cpu_index = boot_cpu_id;
+#endif
}
void __init early_cpu_init(void)
@@ -1134,7 +1139,7 @@ void __cpuinit cpu_init(void)
/*
* Boot processor to setup the FP and extended state context info.
*/
- if (!smp_processor_id())
+ if (smp_processor_id() == boot_cpu_id)
init_thread_xstate();
xsave_init();
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d3dcd58b87c..7f05f44b97e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
u32 i = 0;
if (cpu_family == CPU_HW_PSTATE) {
- rdmsr(MSR_PSTATE_STATUS, lo, hi);
- i = lo & HW_PSTATE_MASK;
- data->currpstate = i;
+ if (data->currpstate == HW_PSTATE_INVALID) {
+ /* read (initial) hw pstate if not yet set */
+ rdmsr(MSR_PSTATE_STATUS, lo, hi);
+ i = lo & HW_PSTATE_MASK;
+
+ /*
+ * a workaround for family 11h erratum 311 might cause
+ * an "out-of-range Pstate if the core is in Pstate-0
+ */
+ if (i >= data->numps)
+ data->currpstate = HW_PSTATE_0;
+ else
+ data->currpstate = i;
+ }
return 0;
}
do {
@@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
}
data->cpu = pol->cpu;
+ data->currpstate = HW_PSTATE_INVALID;
if (powernow_k8_cpu_init_acpi(data)) {
/*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d9..65cfb5d7f77 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -5,6 +5,19 @@
* http://www.gnu.org/licenses/gpl.html
*/
+
+enum pstate {
+ HW_PSTATE_INVALID = 0xff,
+ HW_PSTATE_0 = 0,
+ HW_PSTATE_1 = 1,
+ HW_PSTATE_2 = 2,
+ HW_PSTATE_3 = 3,
+ HW_PSTATE_4 = 4,
+ HW_PSTATE_5 = 5,
+ HW_PSTATE_6 = 6,
+ HW_PSTATE_7 = 7,
+};
+
struct powernow_k8_data {
unsigned int cpu;
@@ -23,7 +36,9 @@ struct powernow_k8_data {
u32 exttype; /* extended interface = 1 */
/* keep track of the current fid / vid or pstate */
- u32 currvid, currfid, currpstate;
+ u32 currvid;
+ u32 currfid;
+ enum pstate currpstate;
/* the powernow_table includes all frequency and vid/fid pairings:
* fid are the lower 8 bits of the index, vid are the upper 8 bits.
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 2b69994fd3a..a2d1176c38e 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -21,8 +21,6 @@
*/
-#ifdef CONFIG_X86_DS
-
#include <asm/ds.h>
#include <linux/errno.h>
@@ -211,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context);
static inline struct ds_context *ds_get_context(struct task_struct *task)
{
struct ds_context *context;
+ unsigned long irq;
- spin_lock(&ds_lock);
+ spin_lock_irqsave(&ds_lock, irq);
context = (task ? task->thread.ds_ctx : this_system_context);
if (context)
context->count++;
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
return context;
}
@@ -226,18 +225,16 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
/*
* Same as ds_get_context, but allocates the context and it's DS
* structure, if necessary; returns NULL; if out of memory.
- *
- * pre: requires ds_lock to be held
*/
static inline struct ds_context *ds_alloc_context(struct task_struct *task)
{
struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &this_system_context);
struct ds_context *context = *p_context;
+ unsigned long irq;
if (!context) {
context = kzalloc(sizeof(*context), GFP_KERNEL);
-
if (!context)
return NULL;
@@ -247,18 +244,27 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
return NULL;
}
- *p_context = context;
+ spin_lock_irqsave(&ds_lock, irq);
- context->this = p_context;
- context->task = task;
+ if (*p_context) {
+ kfree(context->ds);
+ kfree(context);
+
+ context = *p_context;
+ } else {
+ *p_context = context;
- if (task)
- set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+ context->this = p_context;
+ context->task = task;
- if (!task || (task == current))
- wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
+ if (task)
+ set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
- get_tracer(task);
+ if (!task || (task == current))
+ wrmsrl(MSR_IA32_DS_AREA,
+ (unsigned long)context->ds);
+ }
+ spin_unlock_irqrestore(&ds_lock, irq);
}
context->count++;
@@ -272,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
*/
static inline void ds_put_context(struct ds_context *context)
{
+ unsigned long irq;
+
if (!context)
return;
- spin_lock(&ds_lock);
+ spin_lock_irqsave(&ds_lock, irq);
if (--context->count)
goto out;
@@ -297,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context)
kfree(context->ds);
kfree(context);
out:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
}
@@ -368,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
struct ds_context *context;
unsigned long buffer, adj;
const unsigned long alignment = (1 << 3);
+ unsigned long irq;
int error = 0;
if (!ds_cfg.sizeof_ds)
@@ -382,25 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
return -EOPNOTSUPP;
- spin_lock(&ds_lock);
-
- if (!check_tracer(task))
- return -EPERM;
-
- error = -ENOMEM;
context = ds_alloc_context(task);
if (!context)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&ds_lock, irq);
+
+ error = -EPERM;
+ if (!check_tracer(task))
goto out_unlock;
+ get_tracer(task);
+
error = -EALREADY;
if (context->owner[qual] == current)
- goto out_unlock;
+ goto out_put_tracer;
error = -EPERM;
if (context->owner[qual] != NULL)
- goto out_unlock;
+ goto out_put_tracer;
context->owner[qual] = current;
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
error = -ENOMEM;
@@ -448,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
out_release:
context->owner[qual] = NULL;
ds_put_context(context);
+ put_tracer(task);
+ return error;
+
+ out_put_tracer:
+ spin_unlock_irqrestore(&ds_lock, irq);
+ ds_put_context(context);
+ put_tracer(task);
return error;
out_unlock:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
ds_put_context(context);
return error;
}
@@ -801,13 +819,21 @@ static const struct ds_configuration ds_cfg_var = {
.sizeof_ds = sizeof(long) * 12,
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10
+#else
+ .sizeof_rec[ds_pebs] = sizeof(long) * 18
+#endif
};
static const struct ds_configuration ds_cfg_64 = {
.sizeof_ds = 8 * 12,
.sizeof_field = 8,
.sizeof_rec[ds_bts] = 8 * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = 8 * 10
+#else
+ .sizeof_rec[ds_pebs] = 8 * 18
+#endif
};
static inline void
@@ -861,4 +887,3 @@ void ds_free(struct ds_context *context)
while (leftovers--)
ds_put_context(context);
}
-#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ce97bf3bed1..7aafeb5263e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1290,15 +1290,17 @@ void __init e820_reserve_resources(void)
res->start = e820.map[i].addr;
res->end = end;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_MEM;
/*
* don't register the region that could be conflicted with
* pci device BAR resource and insert them later in
* pcibios_resource_survey()
*/
- if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20))
+ if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
+ res->flags |= IORESOURCE_BUSY;
insert_resource(&iomem_resource, res);
+ }
res++;
}
@@ -1318,7 +1320,7 @@ void __init e820_reserve_resources_late(void)
res = e820_res;
for (i = 0; i < e820.nr_map; i++) {
if (!res->parent && res->end)
- reserve_region_with_split(&iomem_resource, res->start, res->end, res->name);
+ insert_resource_expand_to_fit(&iomem_resource, res);
res++;
}
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3ce029ffaa5..1b894b72c0f 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
}
#endif
-#ifdef CONFIG_DMAR
-static void __init intel_g33_dmar(int num, int slot, int func)
-{
- struct acpi_table_header *dmar_tbl;
- acpi_status status;
-
- status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
- if (ACPI_SUCCESS(status)) {
- printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
- dmar_disabled = 1;
- }
-}
-#endif
-
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
-#ifdef CONFIG_DMAR
- { PCI_VENDOR_ID_INTEL, 0x29c0,
- PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
-#endif
{}
};
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index dd65143941a..fe7014176eb 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -619,28 +619,37 @@ END(syscall_badsys)
27:;
/*
- * Build the entry stubs and pointer table with
- * some assembler magic.
+ * Build the entry stubs and pointer table with some assembler magic.
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
+ * single cache line on all modern x86 implementations.
*/
-.section .rodata,"a"
+.section .init.rodata,"a"
ENTRY(interrupt)
.text
-
+ .p2align 5
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
RING0_INT_FRAME
-vector=0
-.rept NR_VECTORS
- ALIGN
- .if vector
+vector=FIRST_EXTERNAL_VECTOR
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+ .balign 32
+ .rept 7
+ .if vector < NR_VECTORS
+ .if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4
- .endif
-1: pushl $~(vector)
+ .endif
+1: pushl $(~vector+0x80) /* Note: always in signed byte range */
CFI_ADJUST_CFA_OFFSET 4
- jmp common_interrupt
- .previous
+ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
+ jmp 2f
+ .endif
+ .previous
.long 1b
- .text
+ .text
vector=vector+1
+ .endif
+ .endr
+2: jmp common_interrupt
.endr
END(irq_entries_start)
@@ -652,8 +661,9 @@ END(interrupt)
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
*/
- ALIGN
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
+ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
SAVE_ALL
TRACE_IRQS_OFF
movl %esp,%eax
@@ -678,65 +688,6 @@ ENDPROC(name)
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
-KPROBE_ENTRY(page_fault)
- RING0_EC_FRAME
- pushl $do_page_fault
- CFI_ADJUST_CFA_OFFSET 4
- ALIGN
-error_code:
- /* the function address is in %fs's slot on the stack */
- pushl %es
- CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET es, 0*/
- pushl %ds
- CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET ds, 0*/
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET eax, 0
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET ebp, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET esi, 0
- pushl %edx
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET edx, 0
- pushl %ecx
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET ecx, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET ebx, 0
- cld
- pushl %fs
- CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET fs, 0*/
- movl $(__KERNEL_PERCPU), %ecx
- movl %ecx, %fs
- UNWIND_ESPFIX_STACK
- popl %ecx
- CFI_ADJUST_CFA_OFFSET -4
- /*CFI_REGISTER es, ecx*/
- movl PT_FS(%esp), %edi # get the function address
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
- mov %ecx, PT_FS(%esp)
- /*CFI_REL_OFFSET fs, ES*/
- movl $(__USER_DS), %ecx
- movl %ecx, %ds
- movl %ecx, %es
- TRACE_IRQS_OFF
- movl %esp,%eax # pt_regs pointer
- call *%edi
- jmp ret_from_exception
- CFI_ENDPROC
-KPROBE_END(page_fault)
-
ENTRY(coprocessor_error)
RING0_INT_FRAME
pushl $0
@@ -767,140 +718,6 @@ ENTRY(device_not_available)
CFI_ENDPROC
END(device_not_available)
-/*
- * Debug traps and NMI can happen at the one SYSENTER instruction
- * that sets up the real kernel stack. Check here, since we can't
- * allow the wrong stack to be used.
- *
- * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
- * already pushed 3 words if it hits on the sysenter instruction:
- * eflags, cs and eip.
- *
- * We just load the right stack, and push the three (known) values
- * by hand onto the new stack - while updating the return eip past
- * the instruction that would have done it for sysenter.
- */
-#define FIX_STACK(offset, ok, label) \
- cmpw $__KERNEL_CS,4(%esp); \
- jne ok; \
-label: \
- movl TSS_sysenter_sp0+offset(%esp),%esp; \
- CFI_DEF_CFA esp, 0; \
- CFI_UNDEFINED eip; \
- pushfl; \
- CFI_ADJUST_CFA_OFFSET 4; \
- pushl $__KERNEL_CS; \
- CFI_ADJUST_CFA_OFFSET 4; \
- pushl $sysenter_past_esp; \
- CFI_ADJUST_CFA_OFFSET 4; \
- CFI_REL_OFFSET eip, 0
-
-KPROBE_ENTRY(debug)
- RING0_INT_FRAME
- cmpl $ia32_sysenter_target,(%esp)
- jne debug_stack_correct
- FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
-debug_stack_correct:
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- TRACE_IRQS_OFF
- xorl %edx,%edx # error code 0
- movl %esp,%eax # pt_regs pointer
- call do_debug
- jmp ret_from_exception
- CFI_ENDPROC
-KPROBE_END(debug)
-
-/*
- * NMI is doubly nasty. It can happen _while_ we're handling
- * a debug fault, and the debug fault hasn't yet been able to
- * clear up the stack. So we first check whether we got an
- * NMI on the sysenter entry path, but after that we need to
- * check whether we got an NMI on the debug path where the debug
- * fault happened on the sysenter path.
- */
-KPROBE_ENTRY(nmi)
- RING0_INT_FRAME
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- movl %ss, %eax
- cmpw $__ESPFIX_SS, %ax
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
- je nmi_espfix_stack
- cmpl $ia32_sysenter_target,(%esp)
- je nmi_stack_fixup
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- movl %esp,%eax
- /* Do not access memory above the end of our stack page,
- * it might not exist.
- */
- andl $(THREAD_SIZE-1),%eax
- cmpl $(THREAD_SIZE-20),%eax
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
- jae nmi_stack_correct
- cmpl $ia32_sysenter_target,12(%esp)
- je nmi_debug_stack_check
-nmi_stack_correct:
- /* We have a RING0_INT_FRAME here */
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- TRACE_IRQS_OFF
- xorl %edx,%edx # zero error code
- movl %esp,%eax # pt_regs pointer
- call do_nmi
- jmp restore_nocheck_notrace
- CFI_ENDPROC
-
-nmi_stack_fixup:
- RING0_INT_FRAME
- FIX_STACK(12,nmi_stack_correct, 1)
- jmp nmi_stack_correct
-
-nmi_debug_stack_check:
- /* We have a RING0_INT_FRAME here */
- cmpw $__KERNEL_CS,16(%esp)
- jne nmi_stack_correct
- cmpl $debug,(%esp)
- jb nmi_stack_correct
- cmpl $debug_esp_fix_insn,(%esp)
- ja nmi_stack_correct
- FIX_STACK(24,nmi_stack_correct, 1)
- jmp nmi_stack_correct
-
-nmi_espfix_stack:
- /* We have a RING0_INT_FRAME here.
- *
- * create the pointer to lss back
- */
- pushl %ss
- CFI_ADJUST_CFA_OFFSET 4
- pushl %esp
- CFI_ADJUST_CFA_OFFSET 4
- addw $4, (%esp)
- /* copy the iret frame of 12 bytes */
- .rept 3
- pushl 16(%esp)
- CFI_ADJUST_CFA_OFFSET 4
- .endr
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- TRACE_IRQS_OFF
- FIXUP_ESPFIX_STACK # %eax == %esp
- xorl %edx,%edx # zero error code
- call do_nmi
- RESTORE_REGS
- lss 12+4(%esp), %esp # back to espfix stack
- CFI_ADJUST_CFA_OFFSET -24
- jmp irq_return
- CFI_ENDPROC
-KPROBE_END(nmi)
-
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iret
@@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit)
END(native_irq_enable_sysexit)
#endif
-KPROBE_ENTRY(int3)
- RING0_INT_FRAME
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
- SAVE_ALL
- TRACE_IRQS_OFF
- xorl %edx,%edx # zero error code
- movl %esp,%eax # pt_regs pointer
- call do_int3
- jmp ret_from_exception
- CFI_ENDPROC
-KPROBE_END(int3)
-
ENTRY(overflow)
RING0_INT_FRAME
pushl $0
@@ -993,14 +797,6 @@ ENTRY(stack_segment)
CFI_ENDPROC
END(stack_segment)
-KPROBE_ENTRY(general_protection)
- RING0_EC_FRAME
- pushl $do_general_protection
- CFI_ADJUST_CFA_OFFSET 4
- jmp error_code
- CFI_ENDPROC
-KPROBE_END(general_protection)
-
ENTRY(alignment_check)
RING0_EC_FRAME
pushl $do_alignment_check
@@ -1051,6 +847,7 @@ ENTRY(kernel_thread_helper)
push %eax
CFI_ADJUST_CFA_OFFSET 4
call do_exit
+ ud2 # padding for call trace
CFI_ENDPROC
ENDPROC(kernel_thread_helper)
@@ -1149,7 +946,7 @@ ENDPROC(xen_failsafe_callback)
#endif /* CONFIG_XEN */
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
@@ -1204,9 +1001,233 @@ trace:
jmp ftrace_stub
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
.section .rodata,"a"
#include "syscall_table_32.S"
syscall_table_size=(.-sys_call_table)
+
+/*
+ * Some functions should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
+
+ENTRY(page_fault)
+ RING0_EC_FRAME
+ pushl $do_page_fault
+ CFI_ADJUST_CFA_OFFSET 4
+ ALIGN
+error_code:
+ /* the function address is in %fs's slot on the stack */
+ pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0*/
+ pushl %ds
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ds, 0*/
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax, 0
+ pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebp, 0
+ pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
+ pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
+ pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx, 0
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
+ pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
+ cld
+ pushl %fs
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET fs, 0*/
+ movl $(__KERNEL_PERCPU), %ecx
+ movl %ecx, %fs
+ UNWIND_ESPFIX_STACK
+ popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_REGISTER es, ecx*/
+ movl PT_FS(%esp), %edi # get the function address
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+ mov %ecx, PT_FS(%esp)
+ /*CFI_REL_OFFSET fs, ES*/
+ movl $(__USER_DS), %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
+ TRACE_IRQS_OFF
+ movl %esp,%eax # pt_regs pointer
+ call *%edi
+ jmp ret_from_exception
+ CFI_ENDPROC
+END(page_fault)
+
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+#define FIX_STACK(offset, ok, label) \
+ cmpw $__KERNEL_CS,4(%esp); \
+ jne ok; \
+label: \
+ movl TSS_sysenter_sp0+offset(%esp),%esp; \
+ CFI_DEF_CFA esp, 0; \
+ CFI_UNDEFINED eip; \
+ pushfl; \
+ CFI_ADJUST_CFA_OFFSET 4; \
+ pushl $__KERNEL_CS; \
+ CFI_ADJUST_CFA_OFFSET 4; \
+ pushl $sysenter_past_esp; \
+ CFI_ADJUST_CFA_OFFSET 4; \
+ CFI_REL_OFFSET eip, 0
+
+ENTRY(debug)
+ RING0_INT_FRAME
+ cmpl $ia32_sysenter_target,(%esp)
+ jne debug_stack_correct
+ FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+debug_stack_correct:
+ pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ xorl %edx,%edx # error code 0
+ movl %esp,%eax # pt_regs pointer
+ call do_debug
+ jmp ret_from_exception
+ CFI_ENDPROC
+END(debug)
+
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+ RING0_INT_FRAME
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ movl %ss, %eax
+ cmpw $__ESPFIX_SS, %ax
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ je nmi_espfix_stack
+ cmpl $ia32_sysenter_target,(%esp)
+ je nmi_stack_fixup
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ movl %esp,%eax
+ /* Do not access memory above the end of our stack page,
+ * it might not exist.
+ */
+ andl $(THREAD_SIZE-1),%eax
+ cmpl $(THREAD_SIZE-20),%eax
+ popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
+ jae nmi_stack_correct
+ cmpl $ia32_sysenter_target,12(%esp)
+ je nmi_debug_stack_check
+nmi_stack_correct:
+ /* We have a RING0_INT_FRAME here */
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_nmi
+ jmp restore_nocheck_notrace
+ CFI_ENDPROC
+
+nmi_stack_fixup:
+ RING0_INT_FRAME
+ FIX_STACK(12,nmi_stack_correct, 1)
+ jmp nmi_stack_correct
+
+nmi_debug_stack_check:
+ /* We have a RING0_INT_FRAME here */
+ cmpw $__KERNEL_CS,16(%esp)
+ jne nmi_stack_correct
+ cmpl $debug,(%esp)
+ jb nmi_stack_correct
+ cmpl $debug_esp_fix_insn,(%esp)
+ ja nmi_stack_correct
+ FIX_STACK(24,nmi_stack_correct, 1)
+ jmp nmi_stack_correct
+
+nmi_espfix_stack:
+ /* We have a RING0_INT_FRAME here.
+ *
+ * create the pointer to lss back
+ */
+ pushl %ss
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %esp
+ CFI_ADJUST_CFA_OFFSET 4
+ addw $4, (%esp)
+ /* copy the iret frame of 12 bytes */
+ .rept 3
+ pushl 16(%esp)
+ CFI_ADJUST_CFA_OFFSET 4
+ .endr
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ FIXUP_ESPFIX_STACK # %eax == %esp
+ xorl %edx,%edx # zero error code
+ call do_nmi
+ RESTORE_REGS
+ lss 12+4(%esp), %esp # back to espfix stack
+ CFI_ADJUST_CFA_OFFSET -24
+ jmp irq_return
+ CFI_ENDPROC
+END(nmi)
+
+ENTRY(int3)
+ RING0_INT_FRAME
+ pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ TRACE_IRQS_OFF
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_int3
+ jmp ret_from_exception
+ CFI_ENDPROC
+END(int3)
+
+ENTRY(general_protection)
+ RING0_EC_FRAME
+ pushl $do_general_protection
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+END(general_protection)
+
+/*
+ * End of kprobes section
+ */
+ .popsection
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 09e7145484c..3194636a429 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -11,15 +11,15 @@
*
* NOTE: This code handles signal-recognition, which happens every time
* after an interrupt and after each system call.
- *
- * Normal syscalls and interrupts don't save a full stack frame, this is
+ *
+ * Normal syscalls and interrupts don't save a full stack frame, this is
* only done for syscall tracing, signals or fork/exec et.al.
- *
- * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
- * at the top of the kernel process stack.
+ *
+ * A note on terminology:
+ * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * at the top of the kernel process stack.
* - partial stack frame: partially saved registers upto R11.
- * - full stack frame: Like partial stack frame, but all register saved.
+ * - full stack frame: Like partial stack frame, but all register saved.
*
* Some macro usage:
* - CFI macros are used to generate dwarf2 unwind information for better
@@ -60,8 +60,7 @@
#define __AUDIT_ARCH_LE 0x40000000
.code64
-
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
retq
@@ -138,11 +137,11 @@ trace:
jmp ftrace_stub
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
-#endif
+#endif
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
@@ -161,29 +160,29 @@ ENTRY(native_usergs_sysret64)
.endm
/*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
+ * C code is not supposed to know about undefined top of stack. Every time
+ * a C function with an pt_regs argument is called from the SYSCALL based
* fast path FIXUP_TOP_OF_STACK is needed.
* RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
* manipulation.
- */
-
- /* %rsp:at FRAMEEND */
- .macro FIXUP_TOP_OF_STACK tmp
- movq %gs:pda_oldrsp,\tmp
- movq \tmp,RSP(%rsp)
- movq $__USER_DS,SS(%rsp)
- movq $__USER_CS,CS(%rsp)
- movq $-1,RCX(%rsp)
- movq R11(%rsp),\tmp /* get eflags */
- movq \tmp,EFLAGS(%rsp)
+ */
+
+ /* %rsp:at FRAMEEND */
+ .macro FIXUP_TOP_OF_STACK tmp offset=0
+ movq %gs:pda_oldrsp,\tmp
+ movq \tmp,RSP+\offset(%rsp)
+ movq $__USER_DS,SS+\offset(%rsp)
+ movq $__USER_CS,CS+\offset(%rsp)
+ movq $-1,RCX+\offset(%rsp)
+ movq R11+\offset(%rsp),\tmp /* get eflags */
+ movq \tmp,EFLAGS+\offset(%rsp)
.endm
- .macro RESTORE_TOP_OF_STACK tmp,offset=0
- movq RSP-\offset(%rsp),\tmp
- movq \tmp,%gs:pda_oldrsp
- movq EFLAGS-\offset(%rsp),\tmp
- movq \tmp,R11-\offset(%rsp)
+ .macro RESTORE_TOP_OF_STACK tmp offset=0
+ movq RSP+\offset(%rsp),\tmp
+ movq \tmp,%gs:pda_oldrsp
+ movq EFLAGS+\offset(%rsp),\tmp
+ movq \tmp,R11+\offset(%rsp)
.endm
.macro FAKE_STACK_FRAME child_rip
@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64)
pushq %rax /* rsp */
CFI_ADJUST_CFA_OFFSET 8
CFI_REL_OFFSET rsp,0
- pushq $(1<<9) /* eflags - interrupts on */
+ pushq $X86_EFLAGS_IF /* eflags - interrupts on */
CFI_ADJUST_CFA_OFFSET 8
/*CFI_REL_OFFSET rflags,0*/
pushq $__KERNEL_CS /* cs */
@@ -213,62 +212,184 @@ ENTRY(native_usergs_sysret64)
CFI_ADJUST_CFA_OFFSET -(6*8)
.endm
- .macro CFI_DEFAULT_STACK start=1
+/*
+ * initial frame state for interrupts (and exceptions without error code)
+ */
+ .macro EMPTY_FRAME start=1 offset=0
.if \start
- CFI_STARTPROC simple
+ CFI_STARTPROC simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8
+ CFI_DEF_CFA rsp,8+\offset
.else
- CFI_DEF_CFA_OFFSET SS+8
+ CFI_DEF_CFA_OFFSET 8+\offset
.endif
- CFI_REL_OFFSET r15,R15
- CFI_REL_OFFSET r14,R14
- CFI_REL_OFFSET r13,R13
- CFI_REL_OFFSET r12,R12
- CFI_REL_OFFSET rbp,RBP
- CFI_REL_OFFSET rbx,RBX
- CFI_REL_OFFSET r11,R11
- CFI_REL_OFFSET r10,R10
- CFI_REL_OFFSET r9,R9
- CFI_REL_OFFSET r8,R8
- CFI_REL_OFFSET rax,RAX
- CFI_REL_OFFSET rcx,RCX
- CFI_REL_OFFSET rdx,RDX
- CFI_REL_OFFSET rsi,RSI
- CFI_REL_OFFSET rdi,RDI
- CFI_REL_OFFSET rip,RIP
- /*CFI_REL_OFFSET cs,CS*/
- /*CFI_REL_OFFSET rflags,EFLAGS*/
- CFI_REL_OFFSET rsp,RSP
- /*CFI_REL_OFFSET ss,SS*/
.endm
+
+/*
+ * initial frame state for interrupts (and exceptions without error code)
+ */
+ .macro INTR_FRAME start=1 offset=0
+ EMPTY_FRAME \start, SS+8+\offset-RIP
+ /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
+ CFI_REL_OFFSET rsp, RSP+\offset-RIP
+ /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
+ /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
+ CFI_REL_OFFSET rip, RIP+\offset-RIP
+ .endm
+
+/*
+ * initial frame state for exceptions with error code (and interrupts
+ * with vector already pushed)
+ */
+ .macro XCPT_FRAME start=1 offset=0
+ INTR_FRAME \start, RIP+\offset-ORIG_RAX
+ /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
+ .endm
+
+/*
+ * frame that enables calling into C.
+ */
+ .macro PARTIAL_FRAME start=1 offset=0
+ XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
+ CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
+ CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
+ CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
+ CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
+ CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
+ CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
+ CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
+ CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
+ CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+ .endm
+
+/*
+ * frame that enables passing a complete pt_regs to a C function.
+ */
+ .macro DEFAULT_FRAME start=1 offset=0
+ PARTIAL_FRAME \start, R11+\offset-R15
+ CFI_REL_OFFSET rbx, RBX+\offset
+ CFI_REL_OFFSET rbp, RBP+\offset
+ CFI_REL_OFFSET r12, R12+\offset
+ CFI_REL_OFFSET r13, R13+\offset
+ CFI_REL_OFFSET r14, R14+\offset
+ CFI_REL_OFFSET r15, R15+\offset
+ .endm
+
+/* save partial stack frame */
+ENTRY(save_args)
+ XCPT_FRAME
+ cld
+ movq_cfi rdi, RDI+16-ARGOFFSET
+ movq_cfi rsi, RSI+16-ARGOFFSET
+ movq_cfi rdx, RDX+16-ARGOFFSET
+ movq_cfi rcx, RCX+16-ARGOFFSET
+ movq_cfi rax, RAX+16-ARGOFFSET
+ movq_cfi r8, R8+16-ARGOFFSET
+ movq_cfi r9, R9+16-ARGOFFSET
+ movq_cfi r10, R10+16-ARGOFFSET
+ movq_cfi r11, R11+16-ARGOFFSET
+
+ leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
+ movq_cfi rbp, 8 /* push %rbp */
+ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
+ testl $3, CS(%rdi)
+ je 1f
+ SWAPGS
+ /*
+ * irqcount is used to check if a CPU is already on an interrupt stack
+ * or not. While this is essentially redundant with preempt_count it is
+ * a little cheaper to use a separate counter in the PDA (short of
+ * moving irq_enter into assembly, which would be too much work)
+ */
+1: incl %gs:pda_irqcount
+ jne 2f
+ popq_cfi %rax /* move return address... */
+ mov %gs:pda_irqstackptr,%rsp
+ EMPTY_FRAME 0
+ pushq_cfi %rax /* ... to the new stack */
+ /*
+ * We entered an interrupt context - irqs are off:
+ */
+2: TRACE_IRQS_OFF
+ ret
+ CFI_ENDPROC
+END(save_args)
+
+ENTRY(save_rest)
+ PARTIAL_FRAME 1 REST_SKIP+8
+ movq 5*8+16(%rsp), %r11 /* save return address */
+ movq_cfi rbx, RBX+16
+ movq_cfi rbp, RBP+16
+ movq_cfi r12, R12+16
+ movq_cfi r13, R13+16
+ movq_cfi r14, R14+16
+ movq_cfi r15, R15+16
+ movq %r11, 8(%rsp) /* return address */
+ FIXUP_TOP_OF_STACK %r11, 16
+ ret
+ CFI_ENDPROC
+END(save_rest)
+
+/* save complete stack frame */
+ENTRY(save_paranoid)
+ XCPT_FRAME 1 RDI+8
+ cld
+ movq_cfi rdi, RDI+8
+ movq_cfi rsi, RSI+8
+ movq_cfi rdx, RDX+8
+ movq_cfi rcx, RCX+8
+ movq_cfi rax, RAX+8
+ movq_cfi r8, R8+8
+ movq_cfi r9, R9+8
+ movq_cfi r10, R10+8
+ movq_cfi r11, R11+8
+ movq_cfi rbx, RBX+8
+ movq_cfi rbp, RBP+8
+ movq_cfi r12, R12+8
+ movq_cfi r13, R13+8
+ movq_cfi r14, R14+8
+ movq_cfi r15, R15+8
+ movl $1,%ebx
+ movl $MSR_GS_BASE,%ecx
+ rdmsr
+ testl %edx,%edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+ xorl %ebx,%ebx
+1: ret
+ CFI_ENDPROC
+END(save_paranoid)
+
/*
- * A newly forked process directly context switches into this.
- */
-/* rdi: prev */
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
+ */
ENTRY(ret_from_fork)
- CFI_DEFAULT_STACK
+ DEFAULT_FRAME
+
push kernel_eflags(%rip)
CFI_ADJUST_CFA_OFFSET 8
- popf # reset kernel eflags
+ popf # reset kernel eflags
CFI_ADJUST_CFA_OFFSET -8
- call schedule_tail
+
+ call schedule_tail # rdi: 'prev' task parameter
+
GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
- jnz rff_trace
-rff_action:
+
+ CFI_REMEMBER_STATE
RESTORE_REST
- testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
+
+ testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
je int_ret_from_sys_call
- testl $_TIF_IA32,TI_flags(%rcx)
+
+ testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
- RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
- jmp ret_from_sys_call
-rff_trace:
- movq %rsp,%rdi
- call syscall_trace_leave
- GET_THREAD_INFO(%rcx)
- jmp rff_action
+
+ RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
+ jmp ret_from_sys_call # go to the SYSRET fastpath
+
+ CFI_RESTORE_STATE
CFI_ENDPROC
END(ret_from_fork)
@@ -278,20 +399,20 @@ END(ret_from_fork)
* SYSCALL does not save anything on the stack and does not change the
* stack pointer.
*/
-
+
/*
- * Register setup:
+ * Register setup:
* rax system call number
* rdi arg0
- * rcx return address for syscall/sysret, C arg3
+ * rcx return address for syscall/sysret, C arg3
* rsi arg1
- * rdx arg2
+ * rdx arg2
* r10 arg3 (--> moved to rcx for C)
* r8 arg4
* r9 arg5
* r11 eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
- *
+ * r12-r15,rbp,rbx saved by C code, not touched.
+ *
* Interrupts are off on entry.
* Only called from user space.
*
@@ -301,7 +422,7 @@ END(ret_from_fork)
* When user can change the frames always force IRET. That is because
* it deals with uncanonical addresses better. SYSRET has trouble
* with them due to bugs in both AMD and Intel CPUs.
- */
+ */
ENTRY(system_call)
CFI_STARTPROC simple
@@ -317,7 +438,7 @@ ENTRY(system_call)
*/
ENTRY(system_call_after_swapgs)
- movq %rsp,%gs:pda_oldrsp
+ movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
/*
* No need to follow this irqs off/on section - it's straight
@@ -325,7 +446,7 @@ ENTRY(system_call_after_swapgs)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,1
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
+ movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
@@ -339,19 +460,19 @@ system_call_fastpath:
movq %rax,RAX-ARGOFFSET(%rsp)
/*
* Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
- */
+ * Has incomplete stack frame and undefined top of stack.
+ */
ret_from_sys_call:
movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
-sysret_check:
+sysret_check:
LOCKDEP_SYS_EXIT
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl TI_flags(%rcx),%edx
andl %edi,%edx
- jnz sysret_careful
+ jnz sysret_careful
CFI_REMEMBER_STATE
/*
* sysretq will re-enable interrupts:
@@ -366,7 +487,7 @@ sysret_check:
CFI_RESTORE_STATE
/* Handle reschedules */
- /* edx: work, edi: workmask */
+ /* edx: work, edi: workmask */
sysret_careful:
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
@@ -379,7 +500,7 @@ sysret_careful:
CFI_ADJUST_CFA_OFFSET -8
jmp sysret_check
- /* Handle a signal */
+ /* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
@@ -388,17 +509,20 @@ sysret_signal:
jc sysret_audit
#endif
/* edx: work flags (arg3) */
- leaq do_notify_resume(%rip),%rax
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
xorl %esi,%esi # oldset -> arg2
- call ptregscall_common
+ SAVE_REST
+ FIXUP_TOP_OF_STACK %r11
+ call do_notify_resume
+ RESTORE_TOP_OF_STACK %r11
+ RESTORE_REST
movl $_TIF_WORK_MASK,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
-
+
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp ret_from_sys_call
@@ -437,7 +561,7 @@ sysret_audit:
#endif /* CONFIG_AUDITSYSCALL */
/* Do syscall tracing */
-tracesys:
+tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
jz auditsys
@@ -460,8 +584,8 @@ tracesys:
call *sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
-
-/*
+
+/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
@@ -505,18 +629,18 @@ int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
- /* Check for syscall exit trace */
+ /* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
- leaq 8(%rsp),%rdi # &ptregs -> arg1
+ leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest
-
+
int_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz 1f
@@ -531,22 +655,24 @@ int_restore_rest:
jmp int_with_check
CFI_ENDPROC
END(system_call)
-
-/*
+
+/*
* Certain special system calls that need to save a complete full stack frame.
- */
-
+ */
.macro PTREGSCALL label,func,arg
- .globl \label
-\label:
- leaq \func(%rip),%rax
- leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
- jmp ptregscall_common
+ENTRY(\label)
+ PARTIAL_FRAME 1 8 /* offset 8: return address */
+ subq $REST_SKIP, %rsp
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
+ call save_rest
+ DEFAULT_FRAME 0 8 /* offset 8: return address */
+ leaq 8(%rsp), \arg /* pt_regs pointer */
+ call \func
+ jmp ptregscall_common
+ CFI_ENDPROC
END(\label)
.endm
- CFI_STARTPROC
-
PTREGSCALL stub_clone, sys_clone, %r8
PTREGSCALL stub_fork, sys_fork, %rdi
PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -554,25 +680,18 @@ END(\label)
PTREGSCALL stub_iopl, sys_iopl, %rsi
ENTRY(ptregscall_common)
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
- SAVE_REST
- movq %r11, %r15
- CFI_REGISTER rip, r15
- FIXUP_TOP_OF_STACK %r11
- call *%rax
- RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- CFI_REGISTER rip, r11
- RESTORE_REST
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip, 0
- ret
+ DEFAULT_FRAME 1 8 /* offset 8: return address */
+ RESTORE_TOP_OF_STACK %r11, 8
+ movq_cfi_restore R15+8, r15
+ movq_cfi_restore R14+8, r14
+ movq_cfi_restore R13+8, r13
+ movq_cfi_restore R12+8, r12
+ movq_cfi_restore RBP+8, rbp
+ movq_cfi_restore RBX+8, rbx
+ ret $REST_SKIP /* pop extended registers */
CFI_ENDPROC
END(ptregscall_common)
-
+
ENTRY(stub_execve)
CFI_STARTPROC
popq %r11
@@ -588,11 +707,11 @@ ENTRY(stub_execve)
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_execve)
-
+
/*
* sigreturn is special because it needs to restore all registers on return.
* This cannot be done with SYSRET, so use the IRET return path instead.
- */
+ */
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
@@ -608,70 +727,70 @@ ENTRY(stub_rt_sigreturn)
END(stub_rt_sigreturn)
/*
- * initial frame state for interrupts and exceptions
+ * Build the entry stubs and pointer table with some assembler magic.
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
+ * single cache line on all modern x86 implementations.
*/
- .macro _frame ref
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8-\ref
- /*CFI_REL_OFFSET ss,SS-\ref*/
- CFI_REL_OFFSET rsp,RSP-\ref
- /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
- /*CFI_REL_OFFSET cs,CS-\ref*/
- CFI_REL_OFFSET rip,RIP-\ref
- .endm
+ .section .init.rodata,"a"
+ENTRY(interrupt)
+ .text
+ .p2align 5
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+ENTRY(irq_entries_start)
+ INTR_FRAME
+vector=FIRST_EXTERNAL_VECTOR
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+ .balign 32
+ .rept 7
+ .if vector < NR_VECTORS
+ .if vector <> FIRST_EXTERNAL_VECTOR
+ CFI_ADJUST_CFA_OFFSET -8
+ .endif
+1: pushq $(~vector+0x80) /* Note: always in signed byte range */
+ CFI_ADJUST_CFA_OFFSET 8
+ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
+ jmp 2f
+ .endif
+ .previous
+ .quad 1b
+ .text
+vector=vector+1
+ .endif
+ .endr
+2: jmp common_interrupt
+.endr
+ CFI_ENDPROC
+END(irq_entries_start)
-/* initial frame state for interrupts (and exceptions without error code) */
-#define INTR_FRAME _frame RIP
-/* initial frame state for exceptions with error code (and interrupts with
- vector already pushed) */
-#define XCPT_FRAME _frame ORIG_RAX
+.previous
+END(interrupt)
+.previous
-/*
+/*
* Interrupt entry/exit.
*
* Interrupt entry points save only callee clobbered registers in fast path.
- *
- * Entry runs with interrupts off.
- */
+ *
+ * Entry runs with interrupts off.
+ */
-/* 0(%rsp): interrupt number */
+/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
- cld
- SAVE_ARGS
- leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
- pushq %rbp
- /*
- * Save rbp twice: One is for marking the stack frame, as usual, and the
- * other, to fill pt_regs properly. This is because bx comes right
- * before the last saved register in that structure, and not bp. If the
- * base pointer were in the place bx is today, this would not be needed.
- */
- movq %rbp, -8(%rsp)
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbp, 0
- movq %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
- testl $3,CS(%rdi)
- je 1f
- SWAPGS
- /* irqcount is used to check if a CPU is already on an interrupt
- stack or not. While this is essentially redundant with preempt_count
- it is a little cheaper to use a separate counter in the PDA
- (short of moving irq_enter into assembly, which would be too
- much work) */
-1: incl %gs:pda_irqcount
- cmoveq %gs:pda_irqstackptr,%rsp
- push %rbp # backlink for old unwinder
- /*
- * We entered an interrupt context - irqs are off:
- */
- TRACE_IRQS_OFF
+ subq $10*8, %rsp
+ CFI_ADJUST_CFA_OFFSET 10*8
+ call save_args
+ PARTIAL_FRAME 0
call \func
.endm
-ENTRY(common_interrupt)
+ /*
+ * The interrupt stubs push (~vector+0x80) onto the stack and
+ * then jump to common_interrupt.
+ */
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+common_interrupt:
XCPT_FRAME
+ addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
ret_from_intr:
@@ -685,12 +804,12 @@ exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel
-
+
/* Interrupt came from user space */
/*
* Has a correct top of stack, but a partial stack frame
* %rcx: thread info. Interrupts off.
- */
+ */
retint_with_reschedule:
movl $_TIF_WORK_MASK,%edi
retint_check:
@@ -763,20 +882,20 @@ retint_careful:
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
- popq %rdi
+ popq %rdi
CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp retint_check
-
+
retint_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz retint_swapgs
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
- movq $-1,ORIG_RAX(%rsp)
+ movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
@@ -798,324 +917,211 @@ ENTRY(retint_kernel)
jnc retint_restore_args
call preempt_schedule_irq
jmp exit_intr
-#endif
+#endif
CFI_ENDPROC
END(common_interrupt)
-
+
/*
* APIC interrupts.
- */
- .macro apicinterrupt num,func
+ */
+.macro apicinterrupt num sym do_sym
+ENTRY(\sym)
INTR_FRAME
pushq $~(\num)
CFI_ADJUST_CFA_OFFSET 8
- interrupt \func
+ interrupt \do_sym
jmp ret_from_intr
CFI_ENDPROC
- .endm
-
-ENTRY(thermal_interrupt)
- apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
-END(thermal_interrupt)
-
-ENTRY(threshold_interrupt)
- apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
-END(threshold_interrupt)
-
-#ifdef CONFIG_SMP
-ENTRY(reschedule_interrupt)
- apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
-END(reschedule_interrupt)
-
- .macro INVALIDATE_ENTRY num
-ENTRY(invalidate_interrupt\num)
- apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
-END(invalidate_interrupt\num)
- .endm
+END(\sym)
+.endm
- INVALIDATE_ENTRY 0
- INVALIDATE_ENTRY 1
- INVALIDATE_ENTRY 2
- INVALIDATE_ENTRY 3
- INVALIDATE_ENTRY 4
- INVALIDATE_ENTRY 5
- INVALIDATE_ENTRY 6
- INVALIDATE_ENTRY 7
-
-ENTRY(call_function_interrupt)
- apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
-END(call_function_interrupt)
-ENTRY(call_function_single_interrupt)
- apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
-END(call_function_single_interrupt)
-ENTRY(irq_move_cleanup_interrupt)
- apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
-END(irq_move_cleanup_interrupt)
+#ifdef CONFIG_SMP
+apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
#endif
-ENTRY(apic_timer_interrupt)
- apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
-END(apic_timer_interrupt)
+apicinterrupt UV_BAU_MESSAGE \
+ uv_bau_message_intr1 uv_bau_message_interrupt
+apicinterrupt LOCAL_TIMER_VECTOR \
+ apic_timer_interrupt smp_apic_timer_interrupt
+
+#ifdef CONFIG_SMP
+apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
+ invalidate_interrupt0 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
+ invalidate_interrupt1 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
+ invalidate_interrupt2 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
+ invalidate_interrupt3 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
+ invalidate_interrupt4 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
+ invalidate_interrupt5 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
+ invalidate_interrupt6 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
+ invalidate_interrupt7 smp_invalidate_interrupt
+#endif
-ENTRY(uv_bau_message_intr1)
- apicinterrupt 220,uv_bau_message_interrupt
-END(uv_bau_message_intr1)
+apicinterrupt THRESHOLD_APIC_VECTOR \
+ threshold_interrupt mce_threshold_interrupt
+apicinterrupt THERMAL_APIC_VECTOR \
+ thermal_interrupt smp_thermal_interrupt
+
+#ifdef CONFIG_SMP
+apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
+ call_function_single_interrupt smp_call_function_single_interrupt
+apicinterrupt CALL_FUNCTION_VECTOR \
+ call_function_interrupt smp_call_function_interrupt
+apicinterrupt RESCHEDULE_VECTOR \
+ reschedule_interrupt smp_reschedule_interrupt
+#endif
-ENTRY(error_interrupt)
- apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
-END(error_interrupt)
+apicinterrupt ERROR_APIC_VECTOR \
+ error_interrupt smp_error_interrupt
+apicinterrupt SPURIOUS_APIC_VECTOR \
+ spurious_interrupt smp_spurious_interrupt
-ENTRY(spurious_interrupt)
- apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
-END(spurious_interrupt)
-
/*
* Exception entry points.
- */
- .macro zeroentry sym
+ */
+.macro zeroentry sym do_sym
+ENTRY(\sym)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0 /* push error code/oldrax */
- CFI_ADJUST_CFA_OFFSET 8
- pushq %rax /* push real oldrax to the rdi slot */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rax,0
- leaq \sym(%rip),%rax
- jmp error_entry
+ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
+ subq $15*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 15*8
+ call error_entry
+ DEFAULT_FRAME 0
+ movq %rsp,%rdi /* pt_regs pointer */
+ xorl %esi,%esi /* no error code */
+ call \do_sym
+ jmp error_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
- .endm
+END(\sym)
+.endm
- .macro errorentry sym
- XCPT_FRAME
+.macro paranoidzeroentry sym do_sym
+ENTRY(\sym)
+ INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq %rax
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rax,0
- leaq \sym(%rip),%rax
- jmp error_entry
+ subq $15*8, %rsp
+ call save_paranoid
+ TRACE_IRQS_OFF
+ movq %rsp,%rdi /* pt_regs pointer */
+ xorl %esi,%esi /* no error code */
+ call \do_sym
+ jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
- .endm
+END(\sym)
+.endm
- /* error code is on the stack already */
- /* handle NMI like exceptions that can happen everywhere */
- .macro paranoidentry sym, ist=0, irqtrace=1
- SAVE_ALL
- cld
- movl $1,%ebx
- movl $MSR_GS_BASE,%ecx
- rdmsr
- testl %edx,%edx
- js 1f
- SWAPGS
- xorl %ebx,%ebx
-1:
- .if \ist
- movq %gs:pda_data_offset, %rbp
- .endif
- .if \irqtrace
- TRACE_IRQS_OFF
- .endif
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi
- movq $-1,ORIG_RAX(%rsp)
- .if \ist
- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
- .endif
- call \sym
- .if \ist
- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
- .endif
- DISABLE_INTERRUPTS(CLBR_NONE)
- .if \irqtrace
+.macro paranoidzeroentry_ist sym do_sym ist
+ENTRY(\sym)
+ INTR_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
+ CFI_ADJUST_CFA_OFFSET 8
+ subq $15*8, %rsp
+ call save_paranoid
TRACE_IRQS_OFF
- .endif
- .endm
+ movq %rsp,%rdi /* pt_regs pointer */
+ xorl %esi,%esi /* no error code */
+ movq %gs:pda_data_offset, %rbp
+ subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+ call \do_sym
+ addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+ jmp paranoid_exit /* %ebx: no swapgs flag */
+ CFI_ENDPROC
+END(\sym)
+.endm
- /*
- * "Paranoid" exit path from exception stack.
- * Paranoid because this is used by NMIs and cannot take
- * any kernel state for granted.
- * We don't do kernel preemption checks here, because only
- * NMI should be common and it does not enable IRQs and
- * cannot get reschedule ticks.
- *
- * "trace" is 0 for the NMI handler only, because irq-tracing
- * is fundamentally NMI-unsafe. (we cannot change the soft and
- * hard flags at once, atomically)
- */
- .macro paranoidexit trace=1
- /* ebx: no swapgs flag */
-paranoid_exit\trace:
- testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_restore\trace
- testl $3,CS(%rsp)
- jnz paranoid_userspace\trace
-paranoid_swapgs\trace:
- .if \trace
- TRACE_IRQS_IRETQ 0
- .endif
- SWAPGS_UNSAFE_STACK
-paranoid_restore\trace:
- RESTORE_ALL 8
- jmp irq_return
-paranoid_userspace\trace:
- GET_THREAD_INFO(%rcx)
- movl TI_flags(%rcx),%ebx
- andl $_TIF_WORK_MASK,%ebx
- jz paranoid_swapgs\trace
- movq %rsp,%rdi /* &pt_regs */
- call sync_regs
- movq %rax,%rsp /* switch stack for scheduling */
- testl $_TIF_NEED_RESCHED,%ebx
- jnz paranoid_schedule\trace
- movl %ebx,%edx /* arg3: thread flags */
- .if \trace
- TRACE_IRQS_ON
- .endif
- ENABLE_INTERRUPTS(CLBR_NONE)
- xorl %esi,%esi /* arg2: oldset */
- movq %rsp,%rdi /* arg1: &pt_regs */
- call do_notify_resume
- DISABLE_INTERRUPTS(CLBR_NONE)
- .if \trace
- TRACE_IRQS_OFF
- .endif
- jmp paranoid_userspace\trace
-paranoid_schedule\trace:
- .if \trace
- TRACE_IRQS_ON
- .endif
- ENABLE_INTERRUPTS(CLBR_ANY)
- call schedule
- DISABLE_INTERRUPTS(CLBR_ANY)
- .if \trace
- TRACE_IRQS_OFF
- .endif
- jmp paranoid_userspace\trace
+.macro errorentry sym do_sym
+ENTRY(\sym)
+ XCPT_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ subq $15*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 15*8
+ call error_entry
+ DEFAULT_FRAME 0
+ movq %rsp,%rdi /* pt_regs pointer */
+ movq ORIG_RAX(%rsp),%rsi /* get error code */
+ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
+ call \do_sym
+ jmp error_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
- .endm
+END(\sym)
+.endm
-/*
- * Exception entry point. This expects an error code/orig_rax on the stack
- * and the exception handler in %rax.
- */
-KPROBE_ENTRY(error_entry)
- _frame RDI
- CFI_REL_OFFSET rax,0
- /* rdi slot contains rax, oldrax contains error code */
- cld
- subq $14*8,%rsp
- CFI_ADJUST_CFA_OFFSET (14*8)
- movq %rsi,13*8(%rsp)
- CFI_REL_OFFSET rsi,RSI
- movq 14*8(%rsp),%rsi /* load rax from rdi slot */
- CFI_REGISTER rax,rsi
- movq %rdx,12*8(%rsp)
- CFI_REL_OFFSET rdx,RDX
- movq %rcx,11*8(%rsp)
- CFI_REL_OFFSET rcx,RCX
- movq %rsi,10*8(%rsp) /* store rax */
- CFI_REL_OFFSET rax,RAX
- movq %r8, 9*8(%rsp)
- CFI_REL_OFFSET r8,R8
- movq %r9, 8*8(%rsp)
- CFI_REL_OFFSET r9,R9
- movq %r10,7*8(%rsp)
- CFI_REL_OFFSET r10,R10
- movq %r11,6*8(%rsp)
- CFI_REL_OFFSET r11,R11
- movq %rbx,5*8(%rsp)
- CFI_REL_OFFSET rbx,RBX
- movq %rbp,4*8(%rsp)
- CFI_REL_OFFSET rbp,RBP
- movq %r12,3*8(%rsp)
- CFI_REL_OFFSET r12,R12
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13,R13
- movq %r14,1*8(%rsp)
- CFI_REL_OFFSET r14,R14
- movq %r15,(%rsp)
- CFI_REL_OFFSET r15,R15
- xorl %ebx,%ebx
- testl $3,CS(%rsp)
- je error_kernelspace
-error_swapgs:
- SWAPGS
-error_sti:
- TRACE_IRQS_OFF
- movq %rdi,RDI(%rsp)
- CFI_REL_OFFSET rdi,RDI
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi /* get error code */
- movq $-1,ORIG_RAX(%rsp)
- call *%rax
- /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
-error_exit:
- movl %ebx,%eax
- RESTORE_REST
- DISABLE_INTERRUPTS(CLBR_NONE)
+ /* error code is on the stack already */
+.macro paranoiderrorentry sym do_sym
+ENTRY(\sym)
+ XCPT_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ subq $15*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 15*8
+ call save_paranoid
+ DEFAULT_FRAME 0
TRACE_IRQS_OFF
- GET_THREAD_INFO(%rcx)
- testl %eax,%eax
- jne retint_kernel
- LOCKDEP_SYS_EXIT_IRQ
- movl TI_flags(%rcx),%edx
- movl $_TIF_WORK_MASK,%edi
- andl %edi,%edx
- jnz retint_careful
- jmp retint_swapgs
+ movq %rsp,%rdi /* pt_regs pointer */
+ movq ORIG_RAX(%rsp),%rsi /* get error code */
+ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
+ call \do_sym
+ jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
+END(\sym)
+.endm
-error_kernelspace:
- incl %ebx
- /* There are two places in the kernel that can potentially fault with
- usergs. Handle them here. The exception handlers after
- iret run with kernel gs again, so don't set the user space flag.
- B stepping K8s sometimes report an truncated RIP for IRET
- exceptions returning to compat mode. Check for these here too. */
- leaq irq_return(%rip),%rcx
- cmpq %rcx,RIP(%rsp)
- je error_swapgs
- movl %ecx,%ecx /* zero extend */
- cmpq %rcx,RIP(%rsp)
- je error_swapgs
- cmpq $gs_change,RIP(%rsp)
- je error_swapgs
- jmp error_sti
-KPROBE_END(error_entry)
-
- /* Reload gs selector with exception handling */
- /* edi: new selector */
+zeroentry divide_error do_divide_error
+zeroentry overflow do_overflow
+zeroentry bounds do_bounds
+zeroentry invalid_op do_invalid_op
+zeroentry device_not_available do_device_not_available
+paranoiderrorentry double_fault do_double_fault
+zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
+errorentry invalid_TSS do_invalid_TSS
+errorentry segment_not_present do_segment_not_present
+zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
+zeroentry coprocessor_error do_coprocessor_error
+errorentry alignment_check do_alignment_check
+zeroentry simd_coprocessor_error do_simd_coprocessor_error
+
+ /* Reload gs selector with exception handling */
+ /* edi: new selector */
ENTRY(native_load_gs_index)
CFI_STARTPROC
pushf
CFI_ADJUST_CFA_OFFSET 8
DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
- SWAPGS
-gs_change:
- movl %edi,%gs
+ SWAPGS
+gs_change:
+ movl %edi,%gs
2: mfence /* workaround */
SWAPGS
- popf
+ popf
CFI_ADJUST_CFA_OFFSET -8
- ret
+ ret
CFI_ENDPROC
-ENDPROC(native_load_gs_index)
-
- .section __ex_table,"a"
- .align 8
- .quad gs_change,bad_gs
- .previous
- .section .fixup,"ax"
+END(native_load_gs_index)
+
+ .section __ex_table,"a"
+ .align 8
+ .quad gs_change,bad_gs
+ .previous
+ .section .fixup,"ax"
/* running with kernelgs */
-bad_gs:
+bad_gs:
SWAPGS /* switch back to user gs */
xorl %eax,%eax
- movl %eax,%gs
- jmp 2b
- .previous
-
+ movl %eax,%gs
+ jmp 2b
+ .previous
+
/*
* Create a kernel thread.
*
@@ -1138,7 +1144,7 @@ ENTRY(kernel_thread)
xorl %r8d,%r8d
xorl %r9d,%r9d
-
+
# clone now
call do_fork
movq %rax,RAX(%rsp)
@@ -1149,15 +1155,15 @@ ENTRY(kernel_thread)
* so internally to the x86_64 port you can rely on kernel_thread()
* not to reschedule the child before returning, this avoids the need
* of hacks for example to fork off the per-CPU idle tasks.
- * [Hopefully no generic code relies on the reschedule -AK]
+ * [Hopefully no generic code relies on the reschedule -AK]
*/
RESTORE_ALL
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
-ENDPROC(kernel_thread)
-
-child_rip:
+END(kernel_thread)
+
+ENTRY(child_rip)
pushq $0 # fake return address
CFI_STARTPROC
/*
@@ -1170,8 +1176,9 @@ child_rip:
# exit
mov %eax, %edi
call do_exit
+ ud2 # padding for call trace
CFI_ENDPROC
-ENDPROC(child_rip)
+END(child_rip)
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1191,10 +1198,10 @@ ENDPROC(child_rip)
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
- SAVE_ALL
+ SAVE_ALL
movq %rsp,%rcx
call sys_execve
- movq %rax, RAX(%rsp)
+ movq %rax, RAX(%rsp)
RESTORE_REST
testq %rax,%rax
je int_ret_from_sys_call
@@ -1202,129 +1209,7 @@ ENTRY(kernel_execve)
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
-ENDPROC(kernel_execve)
-
-KPROBE_ENTRY(page_fault)
- errorentry do_page_fault
-KPROBE_END(page_fault)
-
-ENTRY(coprocessor_error)
- zeroentry do_coprocessor_error
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
- zeroentry do_simd_coprocessor_error
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
- zeroentry do_device_not_available
-END(device_not_available)
-
- /* runs on exception stack */
-KPROBE_ENTRY(debug)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_debug, DEBUG_STACK
- paranoidexit
-KPROBE_END(debug)
-
- /* runs on exception stack */
-KPROBE_ENTRY(nmi)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $-1
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_nmi, 0, 0
-#ifdef CONFIG_TRACE_IRQFLAGS
- paranoidexit 0
-#else
- jmp paranoid_exit1
- CFI_ENDPROC
-#endif
-KPROBE_END(nmi)
-
-KPROBE_ENTRY(int3)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_int3, DEBUG_STACK
- jmp paranoid_exit1
- CFI_ENDPROC
-KPROBE_END(int3)
-
-ENTRY(overflow)
- zeroentry do_overflow
-END(overflow)
-
-ENTRY(bounds)
- zeroentry do_bounds
-END(bounds)
-
-ENTRY(invalid_op)
- zeroentry do_invalid_op
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
- zeroentry do_coprocessor_segment_overrun
-END(coprocessor_segment_overrun)
-
- /* runs on exception stack */
-ENTRY(double_fault)
- XCPT_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- paranoidentry do_double_fault
- jmp paranoid_exit1
- CFI_ENDPROC
-END(double_fault)
-
-ENTRY(invalid_TSS)
- errorentry do_invalid_TSS
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
- errorentry do_segment_not_present
-END(segment_not_present)
-
- /* runs on exception stack */
-ENTRY(stack_segment)
- XCPT_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- paranoidentry do_stack_segment
- jmp paranoid_exit1
- CFI_ENDPROC
-END(stack_segment)
-
-KPROBE_ENTRY(general_protection)
- errorentry do_general_protection
-KPROBE_END(general_protection)
-
-ENTRY(alignment_check)
- errorentry do_alignment_check
-END(alignment_check)
-
-ENTRY(divide_error)
- zeroentry do_divide_error
-END(divide_error)
-
-ENTRY(spurious_interrupt_bug)
- zeroentry do_spurious_interrupt_bug
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_X86_MCE
- /* runs on exception stack */
-ENTRY(machine_check)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_machine_check
- jmp paranoid_exit1
- CFI_ENDPROC
-END(machine_check)
-#endif
+END(kernel_execve)
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
@@ -1344,40 +1229,33 @@ ENTRY(call_softirq)
decl %gs:pda_irqcount
ret
CFI_ENDPROC
-ENDPROC(call_softirq)
-
-KPROBE_ENTRY(ignore_sysret)
- CFI_STARTPROC
- mov $-ENOSYS,%eax
- sysret
- CFI_ENDPROC
-ENDPROC(ignore_sysret)
+END(call_softirq)
#ifdef CONFIG_XEN
-ENTRY(xen_hypervisor_callback)
- zeroentry xen_do_hypervisor_callback
-END(xen_hypervisor_callback)
+zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
/*
-# A note on the "critical region" in our callback handler.
-# We want to avoid stacking callback handlers due to events occurring
-# during handling of the last event. To do this, we keep events disabled
-# until we've done all processing. HOWEVER, we must enable events before
-# popping the stack frame (can't be done atomically) and so it would still
-# be possible to get enough handler activations to overflow the stack.
-# Although unlikely, bugs of that kind are hard to track down, so we'd
-# like to avoid the possibility.
-# So, on entry to the handler we detect whether we interrupted an
-# existing activation in its critical region -- if so, we pop the current
-# activation and restart the handler using the previous one.
-*/
+ * A note on the "critical region" in our callback handler.
+ * We want to avoid stacking callback handlers due to events occurring
+ * during handling of the last event. To do this, we keep events disabled
+ * until we've done all processing. HOWEVER, we must enable events before
+ * popping the stack frame (can't be done atomically) and so it would still
+ * be possible to get enough handler activations to overflow the stack.
+ * Although unlikely, bugs of that kind are hard to track down, so we'd
+ * like to avoid the possibility.
+ * So, on entry to the handler we detect whether we interrupted an
+ * existing activation in its critical region -- if so, we pop the current
+ * activation and restart the handler using the previous one.
+ */
ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
CFI_STARTPROC
-/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
- see the correct pointer to the pt_regs */
+/*
+ * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
+ * see the correct pointer to the pt_regs
+ */
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
- CFI_DEFAULT_STACK
+ DEFAULT_FRAME
11: incl %gs:pda_irqcount
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
@@ -1392,23 +1270,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
END(do_hypervisor_callback)
/*
-# Hypervisor uses this for application faults while it executes.
-# We get here for two reasons:
-# 1. Fault while reloading DS, ES, FS or GS
-# 2. Fault while executing IRET
-# Category 1 we do not need to fix up as Xen has already reloaded all segment
-# registers that could be reloaded and zeroed the others.
-# Category 2 we fix up by killing the current process. We cannot use the
-# normal Linux return path in this case because if we use the IRET hypercall
-# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
-# We distinguish between categories by comparing each saved segment register
-# with its current contents: any discrepancy means we in category 1.
-*/
+ * Hypervisor uses this for application faults while it executes.
+ * We get here for two reasons:
+ * 1. Fault while reloading DS, ES, FS or GS
+ * 2. Fault while executing IRET
+ * Category 1 we do not need to fix up as Xen has already reloaded all segment
+ * registers that could be reloaded and zeroed the others.
+ * Category 2 we fix up by killing the current process. We cannot use the
+ * normal Linux return path in this case because if we use the IRET hypercall
+ * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
+ * We distinguish between categories by comparing each saved segment register
+ * with its current contents: any discrepancy means we in category 1.
+ */
ENTRY(xen_failsafe_callback)
- framesz = (RIP-0x30) /* workaround buggy gas */
- _frame framesz
- CFI_REL_OFFSET rcx, 0
- CFI_REL_OFFSET r11, 8
+ INTR_FRAME 1 (6*8)
+ /*CFI_REL_OFFSET gs,GS*/
+ /*CFI_REL_OFFSET fs,FS*/
+ /*CFI_REL_OFFSET es,ES*/
+ /*CFI_REL_OFFSET ds,DS*/
+ CFI_REL_OFFSET r11,8
+ CFI_REL_OFFSET rcx,0
movw %ds,%cx
cmpw %cx,0x10(%rsp)
CFI_REMEMBER_STATE
@@ -1429,12 +1310,9 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- pushq %rcx
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $0 /* RIP */
+ pushq_cfi %r11
+ pushq_cfi %rcx
jmp general_protection
CFI_RESTORE_STATE
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1444,11 +1322,223 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $0
SAVE_ALL
jmp error_exit
CFI_ENDPROC
END(xen_failsafe_callback)
#endif /* CONFIG_XEN */
+
+/*
+ * Some functions should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
+
+paranoidzeroentry_ist debug do_debug DEBUG_STACK
+paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
+paranoiderrorentry stack_segment do_stack_segment
+errorentry general_protection do_general_protection
+errorentry page_fault do_page_fault
+#ifdef CONFIG_X86_MCE
+paranoidzeroentry machine_check do_machine_check
+#endif
+
+ /*
+ * "Paranoid" exit path from exception stack.
+ * Paranoid because this is used by NMIs and cannot take
+ * any kernel state for granted.
+ * We don't do kernel preemption checks here, because only
+ * NMI should be common and it does not enable IRQs and
+ * cannot get reschedule ticks.
+ *
+ * "trace" is 0 for the NMI handler only, because irq-tracing
+ * is fundamentally NMI-unsafe. (we cannot change the soft and
+ * hard flags at once, atomically)
+ */
+
+ /* ebx: no swapgs flag */
+ENTRY(paranoid_exit)
+ INTR_FRAME
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ testl %ebx,%ebx /* swapgs needed? */
+ jnz paranoid_restore
+ testl $3,CS(%rsp)
+ jnz paranoid_userspace
+paranoid_swapgs:
+ TRACE_IRQS_IRETQ 0
+ SWAPGS_UNSAFE_STACK
+paranoid_restore:
+ RESTORE_ALL 8
+ jmp irq_return
+paranoid_userspace:
+ GET_THREAD_INFO(%rcx)
+ movl TI_flags(%rcx),%ebx
+ andl $_TIF_WORK_MASK,%ebx
+ jz paranoid_swapgs
+ movq %rsp,%rdi /* &pt_regs */
+ call sync_regs
+ movq %rax,%rsp /* switch stack for scheduling */
+ testl $_TIF_NEED_RESCHED,%ebx
+ jnz paranoid_schedule
+ movl %ebx,%edx /* arg3: thread flags */
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ xorl %esi,%esi /* arg2: oldset */
+ movq %rsp,%rdi /* arg1: &pt_regs */
+ call do_notify_resume
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ jmp paranoid_userspace
+paranoid_schedule:
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_ANY)
+ call schedule
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
+ jmp paranoid_userspace
+ CFI_ENDPROC
+END(paranoid_exit)
+
+/*
+ * Exception entry point. This expects an error code/orig_rax on the stack.
+ * returns in "no swapgs flag" in %ebx.
+ */
+ENTRY(error_entry)
+ XCPT_FRAME
+ CFI_ADJUST_CFA_OFFSET 15*8
+ /* oldrax contains error code */
+ cld
+ movq_cfi rdi, RDI+8
+ movq_cfi rsi, RSI+8
+ movq_cfi rdx, RDX+8
+ movq_cfi rcx, RCX+8
+ movq_cfi rax, RAX+8
+ movq_cfi r8, R8+8
+ movq_cfi r9, R9+8
+ movq_cfi r10, R10+8
+ movq_cfi r11, R11+8
+ movq_cfi rbx, RBX+8
+ movq_cfi rbp, RBP+8
+ movq_cfi r12, R12+8
+ movq_cfi r13, R13+8
+ movq_cfi r14, R14+8
+ movq_cfi r15, R15+8
+ xorl %ebx,%ebx
+ testl $3,CS+8(%rsp)
+ je error_kernelspace
+error_swapgs:
+ SWAPGS
+error_sti:
+ TRACE_IRQS_OFF
+ ret
+ CFI_ENDPROC
+
+/*
+ * There are two places in the kernel that can potentially fault with
+ * usergs. Handle them here. The exception handlers after iret run with
+ * kernel gs again, so don't set the user space flag. B stepping K8s
+ * sometimes report an truncated RIP for IRET exceptions returning to
+ * compat mode. Check for these here too.
+ */
+error_kernelspace:
+ incl %ebx
+ leaq irq_return(%rip),%rcx
+ cmpq %rcx,RIP+8(%rsp)
+ je error_swapgs
+ movl %ecx,%ecx /* zero extend */
+ cmpq %rcx,RIP+8(%rsp)
+ je error_swapgs
+ cmpq $gs_change,RIP+8(%rsp)
+ je error_swapgs
+ jmp error_sti
+END(error_entry)
+
+
+/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
+ENTRY(error_exit)
+ DEFAULT_FRAME
+ movl %ebx,%eax
+ RESTORE_REST
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
+ GET_THREAD_INFO(%rcx)
+ testl %eax,%eax
+ jne retint_kernel
+ LOCKDEP_SYS_EXIT_IRQ
+ movl TI_flags(%rcx),%edx
+ movl $_TIF_WORK_MASK,%edi
+ andl %edi,%edx
+ jnz retint_careful
+ jmp retint_swapgs
+ CFI_ENDPROC
+END(error_exit)
+
+
+ /* runs on exception stack */
+ENTRY(nmi)
+ INTR_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ pushq_cfi $-1
+ subq $15*8, %rsp
+ CFI_ADJUST_CFA_OFFSET 15*8
+ call save_paranoid
+ DEFAULT_FRAME 0
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ movq %rsp,%rdi
+ movq $-1,%rsi
+ call do_nmi
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* paranoidexit; without TRACE_IRQS_OFF */
+ /* ebx: no swapgs flag */
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ testl %ebx,%ebx /* swapgs needed? */
+ jnz nmi_restore
+ testl $3,CS(%rsp)
+ jnz nmi_userspace
+nmi_swapgs:
+ SWAPGS_UNSAFE_STACK
+nmi_restore:
+ RESTORE_ALL 8
+ jmp irq_return
+nmi_userspace:
+ GET_THREAD_INFO(%rcx)
+ movl TI_flags(%rcx),%ebx
+ andl $_TIF_WORK_MASK,%ebx
+ jz nmi_swapgs
+ movq %rsp,%rdi /* &pt_regs */
+ call sync_regs
+ movq %rax,%rsp /* switch stack for scheduling */
+ testl $_TIF_NEED_RESCHED,%ebx
+ jnz nmi_schedule
+ movl %ebx,%edx /* arg3: thread flags */
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ xorl %esi,%esi /* arg2: oldset */
+ movq %rsp,%rdi /* arg1: &pt_regs */
+ call do_notify_resume
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ jmp nmi_userspace
+nmi_schedule:
+ ENABLE_INTERRUPTS(CLBR_ANY)
+ call schedule
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ jmp nmi_userspace
+ CFI_ENDPROC
+#else
+ jmp paranoid_exit
+ CFI_ENDPROC
+#endif
+END(nmi)
+
+ENTRY(ignore_sysret)
+ CFI_STARTPROC
+ mov $-ENOSYS,%eax
+ sysret
+ CFI_ENDPROC
+END(ignore_sysret)
+
+/*
+ * End of kprobes section
+ */
+ .popsection
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index f454c78fcef..0aa2c443d60 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -250,31 +250,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
int i = 0;
- acpi_size tbl_size;
- while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
+ while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
struct oem_table *t = (struct oem_table *)header;
oem_addrX = t->OEMTableAddr;
oem_size = t->OEMTableSize;
- early_acpi_os_unmap_memory(header, tbl_size);
*oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
oem_size);
return 0;
}
- early_acpi_os_unmap_memory(header, tbl_size);
}
return -1;
}
void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
{
- if (!oem_addr)
- return;
-
- __acpi_unmap_table((char *)oem_addr, oem_size);
}
#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d073d981a73..50ea0ac8c9b 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -21,8 +21,7 @@
#include <asm/nops.h>
-/* Long is fine, even if it is only 4 bytes ;-) */
-static unsigned long *ftrace_nop;
+static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
@@ -33,17 +32,17 @@ union ftrace_code_union {
};
-static int notrace ftrace_calc_offset(long ip, long addr)
+static int ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
-notrace unsigned char *ftrace_nop_replace(void)
+unsigned char *ftrace_nop_replace(void)
{
- return (char *)ftrace_nop;
+ return ftrace_nop;
}
-notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
@@ -57,7 +56,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
-notrace int
+int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
@@ -66,26 +65,31 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
/*
* Note: Due to modules and __init, code can
* disappear and change, we need to protect against faulting
- * as well as code changing.
+ * as well as code changing. We do this by using the
+ * probe_kernel_* functions.
*
* No real locking needed, this code is run through
* kstop_machine, or before SMP starts.
*/
- if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
- return 1;
+ /* read the text we want to modify */
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
- return 2;
+ return -EINVAL;
- WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
- MCOUNT_INSN_SIZE));
+ /* replace the text with the new text */
+ if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+ return -EPERM;
sync_core();
return 0;
}
-notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
unsigned char old[MCOUNT_INSN_SIZE], *new;
@@ -98,13 +102,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
-notrace int ftrace_mcount_set(unsigned long *data)
-{
- /* mcount is initialized as a nop */
- *data = 0;
- return 0;
-}
-
int __init ftrace_dyn_arch_init(void *data)
{
extern const unsigned char ftrace_test_p6nop[];
@@ -127,9 +124,6 @@ int __init ftrace_dyn_arch_init(void *data)
* TODO: check the cpuid to determine the best nop.
*/
asm volatile (
- "jmp ftrace_test_jmp\n"
- /* This code needs to stay around */
- ".section .text, \"ax\"\n"
"ftrace_test_jmp:"
"jmp ftrace_test_p6nop\n"
"nop\n"
@@ -140,8 +134,6 @@ int __init ftrace_dyn_arch_init(void *data)
"jmp 1f\n"
"ftrace_test_nop5:"
".byte 0x66,0x66,0x66,0x66,0x90\n"
- "jmp 1f\n"
- ".previous\n"
"1:"
".section .fixup, \"ax\"\n"
"2: movl $1, %0\n"
@@ -156,15 +148,15 @@ int __init ftrace_dyn_arch_init(void *data)
switch (faulted) {
case 0:
pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
- ftrace_nop = (unsigned long *)ftrace_test_p6nop;
+ memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
break;
case 1:
pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
- ftrace_nop = (unsigned long *)ftrace_test_nop5;
+ memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
break;
case 2:
pr_info("ftrace: converting mcount calls to jmp . + 5\n");
- ftrace_nop = (unsigned long *)ftrace_test_jmp;
+ memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
break;
}
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 680a06557c5..2c7dbdb9827 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -15,7 +15,6 @@
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/sched.h>
-#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/hardirq.h>
#include <asm/smp.h>
@@ -398,16 +397,16 @@ void __init uv_system_init(void)
printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
- uv_blade_info = alloc_bootmem_pages(bytes);
+ uv_blade_info = kmalloc(bytes, GFP_KERNEL);
get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
- uv_node_to_blade = alloc_bootmem_pages(bytes);
+ uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
memset(uv_node_to_blade, 255, bytes);
bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
- uv_cpu_to_blade = alloc_bootmem_pages(bytes);
+ uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
memset(uv_cpu_to_blade, 255, bytes);
blade = 0;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 77017e834cf..067d8de913f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta,
* what we wrote hit the chip before we compare it to the
* counter.
*/
- WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt);
+ WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt);
return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
}
@@ -445,7 +445,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
{
if (request_irq(dev->irq, hpet_interrupt_handler,
- IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+ IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev))
return -1;
disable_irq(dev->irq);
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index dd7ebee446a..43cec6bdda6 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -5,7 +5,7 @@
#include <asm/desc.h>
#include <asm/ftrace.h>
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
/* mcount is defined in assembly */
EXPORT_SYMBOL(mcount);
#endif
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608d4ca..b0f61f0dcd0 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
stts();
}
-void __init init_thread_xstate(void)
+void __cpuinit init_thread_xstate(void)
{
if (!HAVE_HWFP) {
xstate_size = sizeof(struct i387_soft_struct);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index b764d7429c6..9043251210f 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1140,6 +1140,20 @@ static void __clear_irq_vector(int irq)
cfg->vector = 0;
cpus_clear(cfg->domain);
+
+ if (likely(!cfg->move_in_progress))
+ return;
+ cpus_and(mask, cfg->old_domain, cpu_online_map);
+ for_each_cpu_mask_nr(cpu, mask) {
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+ vector++) {
+ if (per_cpu(vector_irq, cpu)[vector] != irq)
+ continue;
+ per_cpu(vector_irq, cpu)[vector] = -1;
+ break;
+ }
+ }
+ cfg->move_in_progress = 0;
}
void __setup_vector_irq(int cpu)
@@ -3594,25 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic)
int __init probe_nr_irqs(void)
{
- int idx;
- int nr = 0;
-#ifndef CONFIG_XEN
- int nr_min = 32;
-#else
- int nr_min = NR_IRQS;
-#endif
-
- for (idx = 0; idx < nr_ioapics; idx++)
- nr += io_apic_get_redir_entries(idx) + 1;
-
- /* double it for hotplug and msi and nmi */
- nr <<= 1;
-
- /* something wrong ? */
- if (nr < nr_min)
- nr = nr_min;
-
- return nr;
+ return NR_IRQS;
}
/* --------------------------------------------------------------------------
@@ -3759,7 +3755,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
+ struct irq_desc *desc;
struct irq_cfg *cfg;
+ cpumask_t mask;
if (skip_ioapic_setup == 1)
return;
@@ -3776,16 +3774,30 @@ void __init setup_ioapic_dest(void)
* cpu is online.
*/
cfg = irq_cfg(irq);
- if (!cfg->vector)
+ if (!cfg->vector) {
setup_IO_APIC_irq(ioapic, pin, irq,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
+ continue;
+
+ }
+
+ /*
+ * Honour affinities which have been set in early boot
+ */
+ desc = irq_to_desc(irq);
+ if (desc->status &
+ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+ mask = desc->affinity;
+ else
+ mask = TARGET_CPUS;
+
#ifdef CONFIG_INTR_REMAP
- else if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
+ if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq(irq, mask);
else
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+ set_ioapic_affinity_irq(irq, mask);
}
}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a..1d3d0e71b04 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,7 +18,6 @@
#include <asm/idle.h>
#include <asm/smp.h>
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
* Probabilistic stack overflow check:
*
@@ -28,19 +27,18 @@
*/
static inline void stack_overflow_check(struct pt_regs *regs)
{
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
u64 curbase = (u64)task_stack_page(current);
- static unsigned long warned = -60*HZ;
-
- if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
- regs->sp < curbase + sizeof(struct thread_info) + 128 &&
- time_after(jiffies, warned + 60*HZ)) {
- printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
- current->comm, curbase, regs->sp);
- show_stack(NULL,NULL);
- warned = jiffies;
- }
-}
+
+ WARN_ONCE(regs->sp >= curbase &&
+ regs->sp <= curbase + THREAD_SIZE &&
+ regs->sp < curbase + sizeof(struct thread_info) +
+ sizeof(struct pt_regs) + 128,
+
+ "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+ current->comm, curbase, regs->sp);
#endif
+}
/*
* do_IRQ handles all normal device IRQ's (the special
@@ -60,9 +58,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
irq_enter();
irq = __get_cpu_var(vector_irq)[vector];
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
stack_overflow_check(regs);
-#endif
desc = irq_to_desc(irq);
if (likely(desc))
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e8..607db63044a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -129,7 +129,7 @@ void __init native_init_IRQ(void)
for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
/* SYSCALL_VECTOR was reserved in trap_init. */
if (i != SYSCALL_VECTOR)
- set_intr_gate(i, interrupt[i]);
+ set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
}
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff023539128..8670b3ce626 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -24,41 +24,6 @@
#include <asm/i8259.h>
/*
- * Common place to define all x86 IRQ vectors
- *
- * This builds up the IRQ handler stubs using some ugly macros in irq.h
- *
- * These macros create the low-level assembly IRQ routines that save
- * register context and call do_IRQ(). do_IRQ() then does all the
- * operations that are needed to keep the AT (or SMP IOAPIC)
- * interrupt-controller happy.
- */
-
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-
-/*
- * SMP has a few special interrupts for IPI messages
- */
-
-#define BUILD_IRQ(nr) \
- asmlinkage void IRQ_NAME(nr); \
- asm("\n.text\n.p2align\n" \
- "IRQ" #nr "_interrupt:\n\t" \
- "push $~(" #nr ") ; " \
- "jmp common_interrupt\n" \
- ".previous");
-
-#define BI(x,y) \
- BUILD_IRQ(x##y)
-
-#define BUILD_16_IRQS(x) \
- BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
- BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
- BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-
-/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
* (these are usually mapped to vectors 0x30-0x3f)
*/
@@ -73,37 +38,6 @@
*
* (these are usually mapped into the 0x30-0xff vector range)
*/
- BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
-BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
-BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
-BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
-
-#undef BUILD_16_IRQS
-#undef BI
-
-
-#define IRQ(x,y) \
- IRQ##x##y##_interrupt
-
-#define IRQLIST_16(x) \
- IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
- IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
- IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-
-/* for the irq vectors */
-static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
- IRQLIST_16(0x2), IRQLIST_16(0x3),
- IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
- IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
- IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
-};
-
-#undef IRQ
-#undef IRQLIST_16
-
-
-
/*
* IRQ2 is cascade interrupt to second interrupt controller
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c
index 304d8bad655..cbc4332a77b 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/k8.c
@@ -18,7 +18,6 @@ static u32 *flush_words;
struct pci_device_id k8_nb_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
{}
};
EXPORT_SYMBOL(k8_nb_ids);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 774ac499156..e169ae9b6a6 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
}
#ifdef CONFIG_X86_LOCAL_APIC
-static void kvm_setup_secondary_clock(void)
+static void __cpuinit kvm_setup_secondary_clock(void)
{
/*
* Now that the first cpu already had this clocksource initialized,
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 0732adba05c..7a385746509 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -162,7 +162,10 @@ void machine_kexec(struct kimage *image)
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
page_list[PA_PTE_1] = __pa(kexec_pte1);
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
- page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
+
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
+ << PAGE_SHIFT);
/* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 7a1f8eeac2c..5f8e5d75a25 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -39,7 +39,7 @@
#include <asm/microcode.h>
MODULE_DESCRIPTION("AMD Microcode Update Driver");
-MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>");
+MODULE_AUTHOR("Peter Oruba");
MODULE_LICENSE("GPL v2");
#define UCODE_MAGIC 0x00414d44
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 936d8d55f23..82fb2809ce3 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -480,8 +480,8 @@ static int __init microcode_init(void)
printk(KERN_INFO
"Microcode Update Driver: v" MICROCODE_VERSION
- " <tigran@aivazian.fsnet.co.uk>"
- " <peter.oruba@amd.com>\n");
+ " <tigran@aivazian.fsnet.co.uk>,"
+ " Peter Oruba\n");
return 0;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f98f4e1dba0..0f4c1fd5a1f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -604,6 +604,9 @@ static void __init __get_smp_config(unsigned int early)
printk(KERN_INFO "Using ACPI for processor (LAPIC) "
"configuration information\n");
+ if (!mpf)
+ return;
+
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->mpf_specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d78f3..d28bbdc35e4 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
++p;
if (*p == '\0')
break;
- bridge = simple_strtol(p, &endp, 0);
+ bridge = simple_strtoul(p, &endp, 0);
if (p == endp)
break;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1972266e8ba..19262482021 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -9,6 +9,8 @@
#include <asm/calgary.h>
#include <asm/amd_iommu.h>
+static int forbid_dac __read_mostly;
+
struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
@@ -291,3 +293,17 @@ void pci_iommu_shutdown(void)
}
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
+
+#ifdef CONFIG_PCI
+/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
+
+static __devinit void via_no_dac(struct pci_dev *dev)
+{
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
+ printk(KERN_INFO "PCI: VIA PCI bridge detected."
+ "Disabling DAC.\n");
+ forbid_dac = 1;
+ }
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index e3f75bbcede..ba7ad83e20a 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size)
spin_lock_irqsave(&iommu_bitmap_lock, flags);
iommu_area_free(iommu_gart_bitmap, offset, size);
+ if (offset >= next_bit)
+ next_bit = offset + size;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
@@ -744,7 +746,7 @@ void __init gart_iommu_init(void)
long i;
if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
- printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
+ printk(KERN_INFO "PCI-GART: No AMD GART found.\n");
return;
}
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index c4ce0332759..3c539d111ab 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,9 +18,21 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
}
+static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags)
+{
+ void *vaddr;
+
+ vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags);
+ if (vaddr)
+ return vaddr;
+
+ return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
+}
+
struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
- .alloc_coherent = swiotlb_alloc_coherent,
+ .alloc_coherent = x86_swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
.map_single = swiotlb_map_single_phys,
.unmap_single = swiotlb_unmap_single,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f4c93f1cfc1..cc5a2545dd4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off);
static const struct desc_ptr no_idt = {};
static int reboot_mode;
-/*
- * Keyboard reset and triple fault may result in INIT, not RESET, which
- * doesn't work when we're in vmx root mode. Try ACPI first.
- */
-enum reboot_type reboot_type = BOOT_ACPI;
+enum reboot_type reboot_type = BOOT_KBD;
int reboot_force;
#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
@@ -173,6 +169,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
},
},
+ { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
+ .callback = set_bios_reboot,
+ .ident = "Dell OptiPlex 330",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"),
+ DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
+ },
+ },
{ /* Handle problems with rebooting on Dell 2400's */
.callback = set_bios_reboot,
.ident = "Dell PowerEdge 2400",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd..9d5674f7b6c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -764,7 +764,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
.callback = dmi_low_memory_corruption,
.ident = "Phoenix BIOS",
.matches = {
- DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
},
},
#endif
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index cb19d650c21..418a095c579 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void)
break;
no_ctr_free = (i == 4);
if (no_ctr_free) {
+ WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
+ "cpu_khz value may be incorrect.\n");
i = 3;
rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
wrmsrl(MSR_K7_EVNTSEL3, 0);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index e00534b3353..f4049f3513b 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -154,6 +154,12 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
flush_mm = mm;
flush_va = va;
cpus_or(flush_cpumask, cpumask, flush_cpumask);
+
+ /*
+ * Make the above memory operations globally visible before
+ * sending the IPI.
+ */
+ smp_mb();
/*
* We have to send the IPI only to
* CPUs affected.
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index dcbf7a1159e..8f919ca6949 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -183,6 +183,11 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
/*
+ * Make the above memory operations globally visible before
+ * sending the IPI.
+ */
+ smp_mb();
+ /*
* We have to send the IPI only to
* CPUs affected.
*/
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 161bb850fc4..424093b157d 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -55,7 +55,7 @@ u64 native_sched_clock(void)
rdtscll(this_offset);
/* return the value in ns */
- return cycles_2_ns(this_offset);
+ return __cycles_2_ns(this_offset);
}
/* We need to define a real function for sched_clock, to override the
@@ -759,7 +759,7 @@ __cpuinit int unsynchronized_tsc(void)
if (!cpu_has_tsc || tsc_unstable)
return 1;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_SMP
if (apic_is_clustered_box())
return 1;
#endif
@@ -813,10 +813,6 @@ void __init tsc_init(void)
cpu_khz = calibrate_cpu();
#endif
- lpj = ((u64)tsc_khz * 1000);
- do_div(lpj, HZ);
- lpj_fine = lpj;
-
printk("Detected %lu.%03lu MHz processor.\n",
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
@@ -836,6 +832,10 @@ void __init tsc_init(void)
/* now allow native_sched_clock() to use rdtsc */
tsc_disabled = 0;
+ lpj = ((u64)tsc_khz * 1000);
+ do_div(lpj, HZ);
+ lpj_fine = lpj;
+
use_tsc_delay();
/* Check and install the TSC clocksource */
dmi_check_system(bad_tsc_dmi_table);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9ffb01c31c4..1c0dfbca87c 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void)
cycles_t start, now, prev, end;
int i;
+ rdtsc_barrier();
start = get_cycles();
+ rdtsc_barrier();
/*
* The measurement runs for 20 msecs:
*/
@@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void)
*/
__raw_spin_lock(&sync_lock);
prev = last_tsc;
+ rdtsc_barrier();
now = get_cycles();
+ rdtsc_barrier();
last_tsc = now;
__raw_spin_unlock(&sync_lock);
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 7766d36983f..a688f3bfaec 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -78,7 +78,7 @@ static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
static void __init set_vsmp_pv_ops(void)
{
- void *address;
+ void __iomem *address;
unsigned int cap, ctl, cfg;
/* set vSMP magic bits to indicate vSMP capable kernel */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86..ebf2f12900f 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -128,7 +128,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
gettimeofday(tv,NULL);
return;
}
+
+ /*
+ * Surround the RDTSC by barriers, to make sure it's not
+ * speculated to outside the seqlock critical section and
+ * does not cause time warps:
+ */
+ rdtsc_barrier();
now = vread();
+ rdtsc_barrier();
+
base = __vsyscall_gtod_data.clock.cycle_last;
mask = __vsyscall_gtod_data.clock.mask;
mult = __vsyscall_gtod_data.clock.mult;
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index b545f371b5f..695e426aa35 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -12,7 +12,7 @@
#include <asm/desc.h>
#include <asm/ftrace.h>
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
/* mcount is defined in assembly */
EXPORT_SYMBOL(mcount);
#endif
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb75e82..15c3e699918 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
/*
* Enable and initialize the xsave feature.
*/
-void __init xsave_cntxt_init(void)
+void __ref xsave_cntxt_init(void)
{
unsigned int eax, ebx, ecx, edx;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ce3251ce550..b81125f0bde 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,8 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
+ # for device assignment:
+ depends on PCI
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 11c6725fb79..59ebd37ad79 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -545,6 +545,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
if (!pit)
return NULL;
+ mutex_lock(&kvm->lock);
+ pit->irq_source_id = kvm_request_irq_source_id(kvm);
+ mutex_unlock(&kvm->lock);
+ if (pit->irq_source_id < 0) {
+ kfree(pit);
+ return NULL;
+ }
+
mutex_init(&pit->pit_state.lock);
mutex_lock(&pit->pit_state.lock);
spin_lock_init(&pit->pit_state.inject_lock);
@@ -587,6 +595,7 @@ void kvm_free_pit(struct kvm *kvm)
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
+ kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
kfree(kvm->arch.vpit);
}
@@ -595,8 +604,8 @@ void kvm_free_pit(struct kvm *kvm)
static void __inject_pit_timer_intr(struct kvm *kvm)
{
mutex_lock(&kvm->lock);
- kvm_set_irq(kvm, 0, 1);
- kvm_set_irq(kvm, 0, 0);
+ kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
+ kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
mutex_unlock(&kvm->lock);
}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index e436d4983aa..4178022b97a 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -44,6 +44,7 @@ struct kvm_pit {
struct kvm_io_device speaker_dev;
struct kvm *kvm;
struct kvm_kpit_state pit_state;
+ int irq_source_id;
};
#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 99c239c5c0a..410ddbc1aa2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
- rmap_desc_cache, 1);
+ rmap_desc_cache, 4);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
@@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
}
rmap_write_protect(vcpu->kvm, sp->gfn);
+ kvm_unlink_unsync_page(vcpu->kvm, sp);
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
return 1;
}
kvm_mmu_flush_tlb(vcpu);
- kvm_unlink_unsync_page(vcpu->kvm, sp);
return 0;
}
@@ -2634,6 +2634,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->tlb_flush(vcpu);
+ set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
return 1;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 613ec9aa674..84eee43bbe7 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != gw->ptes[level - 2]) {
+ kvm_mmu_put_page(shadow_page, sptep);
kvm_release_pfn_clean(sw->pfn);
sw->sptep = NULL;
return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2643b430d83..a4018b01e1f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
if (cpu_has_virtual_nmis()) {
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
- if (vmx_nmi_enabled(vcpu)) {
+ if (vcpu->arch.interrupt.pending) {
+ enable_nmi_window(vcpu);
+ } else if (vmx_nmi_enabled(vcpu)) {
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = true;
} else {
@@ -3564,7 +3566,8 @@ static int __init vmx_init(void)
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
- VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+ VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
+ VMX_EPT_IGMT_BIT);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK);
kvm_enable_tdp();
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 3e010d21fdd..ec5edc339da 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
+#define VMX_EPT_IGMT_BIT (1ull << 6)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f0677d1eae..f1f8ff2f1fa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1742,7 +1742,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
if (irqchip_in_kernel(kvm)) {
mutex_lock(&kvm->lock);
- kvm_set_irq(kvm, irq_event.irq, irq_event.level);
+ kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event.irq, irq_event.level);
mutex_unlock(&kvm->lock);
r = 0;
}
@@ -4013,6 +4014,9 @@ struct kvm *kvm_arch_create_vm(void)
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+ /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+
return kvm;
}
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 48ee4f9435f..50a779264bb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -367,10 +367,9 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
* lazily after a task switch, and Linux uses that gratefully, but wouldn't a
* name like "FPUTRAP bit" be a little less cryptic?
*
- * We store cr0 (and cr3) locally, because the Host never changes it. The
- * Guest sometimes wants to read it and we'd prefer not to bother the Host
- * unnecessarily. */
-static unsigned long current_cr0, current_cr3;
+ * We store cr0 locally because the Host never changes it. The Guest sometimes
+ * wants to read it and we'd prefer not to bother the Host unnecessarily. */
+static unsigned long current_cr0;
static void lguest_write_cr0(unsigned long val)
{
lazy_hcall(LHCALL_TS, val & X86_CR0_TS, 0, 0);
@@ -399,17 +398,23 @@ static unsigned long lguest_read_cr2(void)
return lguest_data.cr2;
}
+/* See lguest_set_pte() below. */
+static bool cr3_changed = false;
+
/* cr3 is the current toplevel pagetable page: the principle is the same as
- * cr0. Keep a local copy, and tell the Host when it changes. */
+ * cr0. Keep a local copy, and tell the Host when it changes. The only
+ * difference is that our local copy is in lguest_data because the Host needs
+ * to set it upon our initial hypercall. */
static void lguest_write_cr3(unsigned long cr3)
{
+ lguest_data.pgdir = cr3;
lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
- current_cr3 = cr3;
+ cr3_changed = true;
}
static unsigned long lguest_read_cr3(void)
{
- return current_cr3;
+ return lguest_data.pgdir;
}
/* cr4 is used to enable and disable PGE, but we don't care. */
@@ -498,13 +503,13 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
* to forget all of them. Fortunately, this is very rare.
*
* ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow. So we don't even tell the Host
- * anything changed until we've done the first page table switch. */
+ * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell
+ * the Host anything changed until we've done the first page table switch,
+ * which brings boot back to 0.25 seconds. */
static void lguest_set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
- /* Don't bother with hypercall before initial setup. */
- if (current_cr3)
+ if (cr3_changed)
lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
}
@@ -521,7 +526,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
static void lguest_flush_tlb_single(unsigned long addr)
{
/* Simply set it to zero: if it was not, it will fault back in. */
- lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+ lazy_hcall(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
}
/* This is what happens after the Guest has removed a large number of entries.
@@ -581,8 +586,12 @@ static void __init lguest_init_IRQ(void)
for (i = 0; i < LGUEST_IRQS; i++) {
int vector = FIRST_EXTERNAL_VECTOR + i;
+ /* Some systems map "vectors" to interrupts weirdly. Lguest has
+ * a straightforward 1 to 1 mapping, so force that here. */
+ __get_cpu_var(vector_irq)[vector] = i;
if (vector != SYSCALL_VECTOR) {
- set_intr_gate(vector, interrupt[vector]);
+ set_intr_gate(vector,
+ interrupt[vector-FIRST_EXTERNAL_VECTOR]);
set_irq_chip_and_handler_name(i, &lguest_irq_controller,
handle_level_irq,
"level");
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 6bbdd633864..a580b9562e7 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -27,7 +27,7 @@ static struct irqaction irq2 = {
void __init intr_init_hook(void)
{
#ifdef CONFIG_SMP
- smp_intr_init();
+ voyager_smp_intr_init();
#endif
setup_irq(2, &irq2);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 0f6e8a6523a..52145007bd7 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -7,6 +7,7 @@
* This file provides all the same external entries as smp.c but uses
* the voyager hal to provide the functionality
*/
+#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/kernel_stat.h>
@@ -90,6 +91,7 @@ static void ack_vic_irq(unsigned int irq);
static void vic_enable_cpi(void);
static void do_boot_cpu(__u8 cpuid);
static void do_quad_bootstrap(void);
+static void initialize_secondary(void);
int hard_smp_processor_id(void);
int safe_smp_processor_id(void);
@@ -344,6 +346,12 @@ static void do_quad_bootstrap(void)
}
}
+void prefill_possible_map(void)
+{
+ /* This is empty on voyager because we need a much
+ * earlier detection which is done in find_smp_config */
+}
+
/* Set up all the basic stuff: read the SMP config and make all the
* SMP information reflect only the boot cpu. All others will be
* brought on-line later. */
@@ -413,6 +421,7 @@ void __init smp_store_cpu_info(int id)
struct cpuinfo_x86 *c = &cpu_data(id);
*c = boot_cpu_data;
+ c->cpu_index = id;
identify_secondary_cpu(c);
}
@@ -650,6 +659,8 @@ void __init smp_boot_cpus(void)
smp_tune_scheduling();
*/
smp_store_cpu_info(boot_cpu_id);
+ /* setup the jump vector */
+ initial_code = (unsigned long)initialize_secondary;
printk("CPU%d: ", boot_cpu_id);
print_cpu_info(&cpu_data(boot_cpu_id));
@@ -702,7 +713,7 @@ void __init smp_boot_cpus(void)
/* Reload the secondary CPUs task structure (this function does not
* return ) */
-void __init initialize_secondary(void)
+static void __init initialize_secondary(void)
{
#if 0
// AC kernels only
@@ -1248,7 +1259,7 @@ static void handle_vic_irq(unsigned int irq, struct irq_desc *desc)
#define QIC_SET_GATE(cpi, vector) \
set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
-void __init smp_intr_init(void)
+void __init voyager_smp_intr_init(void)
{
int i;
@@ -1780,6 +1791,17 @@ void __init smp_setup_processor_id(void)
x86_write_percpu(cpu_number, hard_smp_processor_id());
}
+static void voyager_send_call_func(cpumask_t callmask)
+{
+ __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id());
+ send_CPI(mask, VIC_CALL_FUNCTION_CPI);
+}
+
+static void voyager_send_call_func_single(int cpu)
+{
+ send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
+}
+
struct smp_ops smp_ops = {
.smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
.smp_prepare_cpus = voyager_smp_prepare_cpus,
@@ -1789,6 +1811,6 @@ struct smp_ops smp_ops = {
.smp_send_stop = voyager_smp_send_stop,
.smp_send_reschedule = voyager_smp_send_reschedule,
- .send_call_func_ipi = native_send_call_func_ipi,
- .send_call_func_single_ipi = native_send_call_func_single_ipi,
+ .send_call_func_ipi = voyager_send_call_func,
+ .send_call_func_single_ipi = voyager_send_call_func_single,
};
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 59f89b434b4..fea4565ff57 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,7 +1,7 @@
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o gup.o
-obj-$(CONFIG_X86_32) += pgtable_32.o
+obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 4ba373c5b8c..be54176e9eb 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -233,7 +233,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- start, len)))
+ (void __user *)start, len)))
goto slow_irqon;
/*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8396868e82c..3ffed259883 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -102,6 +102,8 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
+
+ return pmd_table;
}
#endif
pud = pud_offset(pgd, 0);
@@ -334,7 +336,6 @@ int devmem_is_allowed(unsigned long pagenr)
return 0;
}
-#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
pgprot_t kmap_prot;
@@ -357,6 +358,7 @@ static void __init kmap_init(void)
kmap_prot = PAGE_KERNEL;
}
+#ifdef CONFIG_HIGHMEM
static void __init permanent_kmaps_init(pgd_t *pgd_base)
{
unsigned long vaddr;
@@ -436,7 +438,6 @@ static void __init set_highmem_pages_init(void)
#endif /* !CONFIG_NUMA */
#else
-# define kmap_init() do { } while (0)
# define permanent_kmaps_init(pgd_base) do { } while (0)
# define set_highmem_pages_init() do { } while (0)
#endif /* CONFIG_HIGHMEM */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b8e461d4941..9db01db6e3c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -350,8 +350,10 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
* pagetable pages as RO. So assume someone who pre-setup
* these mappings are more intelligent.
*/
- if (pte_val(*pte))
+ if (pte_val(*pte)) {
+ pages++;
continue;
+ }
if (0)
printk(" pte=%p addr=%lx pte=%016lx\n",
@@ -418,8 +420,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
* not differ with respect to page frame and
* attributes.
*/
- if (page_size_mask & (1 << PG_LEVEL_2M))
+ if (page_size_mask & (1 << PG_LEVEL_2M)) {
+ pages++;
continue;
+ }
new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
}
@@ -499,8 +503,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
* not differ with respect to page frame and
* attributes.
*/
- if (page_size_mask & (1 << PG_LEVEL_1G))
+ if (page_size_mask & (1 << PG_LEVEL_1G)) {
+ pages++;
continue;
+ }
prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
}
@@ -665,12 +671,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
unsigned long last_map_addr = 0;
unsigned long page_size_mask = 0;
unsigned long start_pfn, end_pfn;
+ unsigned long pos;
struct map_range mr[NR_RANGE_MR];
int nr_range, i;
int use_pse, use_gbpages;
- printk(KERN_INFO "init_memory_mapping\n");
+ printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
/*
* Find space for the kernel direct mapping tables.
@@ -704,35 +711,50 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
/* head if not big page alignment ?*/
start_pfn = start >> PAGE_SHIFT;
- end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
+ pos = start_pfn << PAGE_SHIFT;
+ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
<< (PMD_SHIFT - PAGE_SHIFT);
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+ pos = end_pfn << PAGE_SHIFT;
+ }
/* big page (2M) range*/
- start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
<< (PMD_SHIFT - PAGE_SHIFT);
- end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
+ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
<< (PUD_SHIFT - PAGE_SHIFT);
- if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
- end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
+ if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
+ end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask & (1<<PG_LEVEL_2M));
+ pos = end_pfn << PAGE_SHIFT;
+ }
/* big page (1G) range */
- start_pfn = end_pfn;
- end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+ << (PUD_SHIFT - PAGE_SHIFT);
+ end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
page_size_mask &
((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
+ pos = end_pfn << PAGE_SHIFT;
+ }
/* tail is not big page (1G) alignment */
- start_pfn = end_pfn;
- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
- page_size_mask & (1<<PG_LEVEL_2M));
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+ << (PMD_SHIFT - PAGE_SHIFT);
+ end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+ if (start_pfn < end_pfn) {
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+ page_size_mask & (1<<PG_LEVEL_2M));
+ pos = end_pfn << PAGE_SHIFT;
+ }
/* tail is not big page (2M) alignment */
- start_pfn = end_pfn;
+ start_pfn = pos>>PAGE_SHIFT;
end_pfn = end>>PAGE_SHIFT;
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
@@ -831,12 +853,12 @@ int arch_add_memory(int nid, u64 start, u64 size)
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
- last_mapped_pfn = init_memory_mapping(start, start + size-1);
+ last_mapped_pfn = init_memory_mapping(start, start + size);
if (last_mapped_pfn > max_pfn_mapped)
max_pfn_mapped = last_mapped_pfn;
ret = __add_pages(zone, start_pfn, nr_pages);
- WARN_ON(1);
+ WARN_ON_ONCE(ret);
return ret;
}
@@ -878,6 +900,7 @@ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
void __init mem_init(void)
{
long codesize, reservedpages, datasize, initsize;
+ unsigned long absent_pages;
start_periodic_check_for_corruption();
@@ -893,8 +916,9 @@ void __init mem_init(void)
#else
totalram_pages = free_all_bootmem();
#endif
- reservedpages = max_pfn - totalram_pages -
- absent_pages_in_range(0, max_pfn);
+
+ absent_pages = absent_pages_in_range(0, max_pfn);
+ reservedpages = max_pfn - totalram_pages - absent_pages;
after_bootmem = 1;
codesize = (unsigned long) &_etext - (unsigned long) &_text;
@@ -911,10 +935,11 @@ void __init mem_init(void)
VSYSCALL_END - VSYSCALL_START);
printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
- "%ldk reserved, %ldk data, %ldk init)\n",
+ "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
max_pfn << (PAGE_SHIFT-10),
codesize >> 10,
+ absent_pages << (PAGE_SHIFT-10),
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10);
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
new file mode 100644
index 00000000000..d0151d8ce45
--- /dev/null
+++ b/arch/x86/mm/iomap_32.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2008 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <asm/iomap.h>
+#include <linux/module.h>
+
+/* Map 'pfn' using fixed map 'type' and protections 'prot'
+ */
+void *
+iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+{
+ enum fixed_addresses idx;
+ unsigned long vaddr;
+
+ pagefault_disable();
+
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
+ arch_flush_lazy_mmu_mode();
+
+ return (void*) vaddr;
+}
+EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
+
+void
+iounmap_atomic(void *kvaddr, enum km_type type)
+{
+ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+ enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+ /*
+ * Force other mappings to Oops if they'll try to access this pte
+ * without first remap it. Keeping stale mappings around is a bad idea
+ * also, in case the page changes cacheability attributes or becomes
+ * a protected page in a hypervisor.
+ */
+ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ kpte_clear_flush(kmap_pte-idx, vaddr);
+
+ arch_flush_lazy_mmu_mode();
+ pagefault_enable();
+}
+EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ae71e11eb3e..d4c4307ff3e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -387,7 +387,7 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
unsigned long size)
{
unsigned long flags;
- void *ret;
+ void __iomem *ret;
int err;
/*
@@ -399,11 +399,11 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
if (err < 0)
return NULL;
- ret = (void *) __ioremap_caller(phys_addr, size, flags,
- __builtin_return_address(0));
+ ret = __ioremap_caller(phys_addr, size, flags,
+ __builtin_return_address(0));
free_memtype(phys_addr, phys_addr + size);
- return (void __iomem *)ret;
+ return ret;
}
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
@@ -622,7 +622,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
__early_set_fixmap(idx, 0, __pgprot(0));
}
-static void *prev_map[FIX_BTMAPS_SLOTS] __initdata;
+static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
static int __init check_early_ioremap_leak(void)
{
@@ -645,7 +645,7 @@ static int __init check_early_ioremap_leak(void)
}
late_initcall(check_early_ioremap_leak);
-static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
+static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
{
unsigned long offset, last_addr;
unsigned int nrpages;
@@ -713,23 +713,23 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size,
if (early_ioremap_debug)
printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
- prev_map[slot] = (void *) (offset + fix_to_virt(idx0));
+ prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0));
return prev_map[slot];
}
/* Remap an IO device */
-void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
+void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size)
{
return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
}
/* Remap memory */
-void __init *early_memremap(unsigned long phys_addr, unsigned long size)
+void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size)
{
return __early_ioremap(phys_addr, size, PAGE_KERNEL);
}
-void __init early_iounmap(void *addr, unsigned long size)
+void __init early_iounmap(void __iomem *addr, unsigned long size)
{
unsigned long virt_addr;
unsigned long offset;
@@ -779,7 +779,7 @@ void __init early_iounmap(void *addr, unsigned long size)
--idx;
--nrpages;
}
- prev_map[slot] = 0;
+ prev_map[slot] = NULL;
}
void __this_fixmap_does_not_exist(void)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 847c164725f..8518c678d83 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
}
}
+#ifdef CONFIG_HIBERNATION
+/**
+ * resume_map_numa_kva - add KVA mapping to the temporary page tables created
+ * during resume from hibernation
+ * @pgd_base - temporary resume page directory
+ */
+void resume_map_numa_kva(pgd_t *pgd_base)
+{
+ int node;
+
+ for_each_online_node(node) {
+ unsigned long start_va, start_pfn, size, pfn;
+
+ start_va = (unsigned long)node_remap_start_vaddr[node];
+ start_pfn = node_remap_start_pfn[node];
+ size = node_remap_size[node];
+
+ printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
+
+ for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+ unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
+ pgd_t *pgd = pgd_base + pgd_index(vaddr);
+ pud_t *pud = pud_offset(pgd, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
+
+ set_pmd(pmd, pfn_pmd(start_pfn + pfn,
+ PAGE_KERNEL_LARGE_EXEC));
+
+ printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
+ __FUNCTION__, vaddr, start_pfn + pfn);
+ }
+ }
+}
+#endif
+
static unsigned long calculate_numa_remap_pages(void)
{
int nid;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f1dc1b75d16..e89d24815f2 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -67,18 +67,18 @@ static void split_page_count(int level)
void arch_report_meminfo(struct seq_file *m)
{
- seq_printf(m, "DirectMap4k: %8lu kB\n",
+ seq_printf(m, "DirectMap4k: %8lu kB\n",
direct_pages_count[PG_LEVEL_4K] << 2);
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
- seq_printf(m, "DirectMap2M: %8lu kB\n",
+ seq_printf(m, "DirectMap2M: %8lu kB\n",
direct_pages_count[PG_LEVEL_2M] << 11);
#else
- seq_printf(m, "DirectMap4M: %8lu kB\n",
+ seq_printf(m, "DirectMap4M: %8lu kB\n",
direct_pages_count[PG_LEVEL_2M] << 12);
#endif
#ifdef CONFIG_X86_64
if (direct_gbpages)
- seq_printf(m, "DirectMap1G: %8lu kB\n",
+ seq_printf(m, "DirectMap1G: %8lu kB\n",
direct_pages_count[PG_LEVEL_1G] << 20);
#endif
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 738fd0f2495..eb1bf000d12 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -481,12 +481,16 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
return 1;
}
#else
+/* This check is needed to avoid cache aliasing when PAT is enabled */
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
u64 from = ((u64)pfn) << PAGE_SHIFT;
u64 to = from + size;
u64 cursor = from;
+ if (!pat_enabled)
+ return 1;
+
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
printk(KERN_INFO
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 022cd41ea9b..202864ad49a 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type)
*cpu_type = "i386/pii";
break;
case 6 ... 8:
+ case 10 ... 11:
*cpu_type = "i386/piii";
break;
case 9:
+ case 13:
*cpu_type = "i386/p6_mobile";
break;
- case 10 ... 13:
- *cpu_type = "i386/p6";
- break;
case 14:
*cpu_type = "i386/core";
break;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 0620d6d45f7..e9f80c744cf 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -27,8 +27,7 @@ static int num_counters = 2;
static int counter_width = 32;
#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
+#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
@@ -70,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
int i;
if (!reset_value) {
- reset_value = kmalloc(sizeof(unsigned) * num_counters,
+ reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
GFP_ATOMIC);
if (!reset_value)
return;
@@ -124,14 +123,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
static int ppro_check_ctrs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
for (i = 0 ; i < num_counters; ++i) {
if (!reset_value[i])
continue;
- CTR_READ(low, high, msrs, i);
- if (CTR_OVERFLOWED(low)) {
+ rdmsrl(msrs->counters[i].addr, val);
+ if (CTR_OVERFLOWED(val)) {
oprofile_add_sample(regs, i);
wrmsrl(msrs->counters[i].addr, -reset_value[i]);
}
@@ -157,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs)
unsigned int low, high;
int i;
+ if (!reset_value)
+ return;
for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) {
CTRL_READ(low, high, msrs, i);
@@ -172,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs)
unsigned int low, high;
int i;
+ if (!reset_value)
+ return;
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 3c27a809393..2051dc96b8e 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
pci_siemens_interrupt_controller);
/*
- * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
- * have 4096 bytes. Even if the device is capable, that doesn't mean we can
- * access it. Maybe we don't have a way to generate extended config space
- * accesses. So check it
+ * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
+ * 4096 bytes configuration space for each function of their processor
+ * configuration space.
*/
-static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
{
dev->cfg_size = pci_cfg_space_size_ext(dev);
}
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
/*
* SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index f2b6e3f11bf..81197c62d5b 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -12,6 +12,7 @@
#include <asm/system.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/mmzone.h>
/* Defined in hibernate_asm_32.S */
extern int restore_image(void);
@@ -127,6 +128,9 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
}
}
}
+
+ resume_map_numa_kva(pgd_base);
+
return 0;
}
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 313947940a1..6dcefba7836 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_spinlock.o = -pg
CFLAGS_REMOVE_time.o = -pg
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b61534c7a4c..5e4686d70f6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -863,15 +863,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
if (PagePinned(virt_to_page(mm->pgd))) {
SetPagePinned(page);
+ vm_unmap_aliases();
if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
- } else
+ } else {
/* make sure there are no stray mappings of
this page */
kmap_flush_unused();
- vm_unmap_aliases();
+ }
}
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d4d52f5a1cf..636ef4caa52 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -246,11 +246,21 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
{
unsigned long address = (unsigned long)vaddr;
unsigned int level;
- pte_t *pte = lookup_address(address, &level);
- unsigned offset = address & ~PAGE_MASK;
+ pte_t *pte;
+ unsigned offset;
- BUG_ON(pte == NULL);
+ /*
+ * if the PFN is in the linear mapped vaddr range, we can just use
+ * the (quick) virt_to_machine() p2m lookup
+ */
+ if (virt_addr_valid(vaddr))
+ return virt_to_machine(vaddr);
+ /* otherwise we have to do a (slower) full page-table walk */
+
+ pte = lookup_address(address, &level);
+ BUG_ON(pte == NULL);
+ offset = address & ~PAGE_MASK;
return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
}
@@ -410,7 +420,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
xen_mc_batch();
- u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
+ u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
u.val = pte_val_ma(pte);
xen_extend_mmu_update(&u);
@@ -651,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
* For 64-bit, we must skip the Xen hole in the middle of the address
* space, just after the big x86-64 virtual hole.
*/
-static int xen_pgd_walk(struct mm_struct *mm,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- pgd_t *pgd = mm->pgd;
int flush = 0;
unsigned hole_low, hole_high;
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -743,6 +752,14 @@ out:
return flush;
}
+static int xen_pgd_walk(struct mm_struct *mm,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
+{
+ return __xen_pgd_walk(mm, mm->pgd, func, limit);
+}
+
/* If we're using split pte locks, then take the page's lock and
return a pointer to it. Otherwise return NULL. */
static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -840,13 +857,16 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
read-only, and can be pinned. */
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
+ vm_unmap_aliases();
+
xen_mc_batch();
- if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
- /* re-enable interrupts for kmap_flush_unused */
+ if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
+ /* re-enable interrupts for flushing */
xen_mc_issue(0);
+
kmap_flush_unused();
- vm_unmap_aliases();
+
xen_mc_batch();
}
@@ -864,7 +884,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
#else /* CONFIG_X86_32 */
#ifdef CONFIG_X86_PAE
/* Need to make sure unshared kernel PMD is pinnable */
- xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
+ xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
PT_PMD);
#endif
xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
@@ -981,11 +1001,11 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
#ifdef CONFIG_X86_PAE
/* Need to make sure unshared kernel PMD is unpinned */
- xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
+ xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
PT_PMD);
#endif
- xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
+ __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
xen_mc_issue(0);
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d77da613b1d..acd9b6705e0 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu)
alternatives_smp_switch(0);
}
-static void xen_play_dead(void)
+static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d7422dc2a55..9e1afae8461 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu);
void xen_mark_init_mm_pinned(void);
-void __init xen_setup_vcpu_info_placement(void);
+void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);