aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/head_32.S8
-rw-r--r--arch/x86/boot/compressed/head_64.S10
-rw-r--r--arch/x86/boot/copy.S40
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/boot/pmjump.S16
-rw-r--r--arch/x86/configs/i386_defconfig6
-rw-r--r--arch/x86/configs/x86_64_defconfig6
-rw-r--r--arch/x86/ia32/ia32_signal.c70
-rw-r--r--arch/x86/include/asm/io.h11
-rw-r--r--arch/x86/include/asm/iomap.h6
-rw-r--r--arch/x86/include/asm/irq_vectors.h2
-rw-r--r--arch/x86/include/asm/linkage.h64
-rw-r--r--arch/x86/include/asm/page_32_types.h2
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/page_types.h6
-rw-r--r--arch/x86/include/asm/pat.h3
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h6
-rw-r--r--arch/x86/include/asm/processor.h10
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h4
-rw-r--r--arch/x86/include/asm/uaccess_64.h21
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.S4
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S4
-rw-r--r--arch/x86/kernel/alternative.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c6
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/x86/kernel/e820.c3
-rw-r--r--arch/x86/kernel/efi_stub_32.S3
-rw-r--r--arch/x86/kernel/efi_stub_64.S7
-rw-r--r--arch/x86/kernel/entry_32.S4
-rw-r--r--arch/x86/kernel/entry_64.S25
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S4
-rw-r--r--arch/x86/kernel/trampoline_32.S2
-rw-r--r--arch/x86/kernel/trampoline_64.S4
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/vmiclock_32.c3
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S2
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S2
-rw-r--r--arch/x86/lib/getuser.S2
-rw-r--r--arch/x86/mm/fault.c1078
-rw-r--r--arch/x86/mm/iomap_32.c58
-rw-r--r--arch/x86/mm/memtest.c156
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/pageattr.c7
-rw-r--r--arch/x86/mm/pat.c46
-rw-r--r--arch/x86/power/hibernate_asm_32.S2
-rw-r--r--arch/x86/power/hibernate_asm_64.S2
-rw-r--r--arch/x86/vdso/vma.c4
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/xen-head.S2
62 files changed, 963 insertions, 818 deletions
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 29c5fbf0839..3a8a866fb2e 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -25,14 +25,12 @@
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/asm-offsets.h>
.section ".text.head","ax",@progbits
- .globl startup_32
-
-startup_32:
+ENTRY(startup_32)
cld
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments */
@@ -113,6 +111,8 @@ startup_32:
*/
leal relocated(%ebx), %eax
jmp *%eax
+ENDPROC(startup_32)
+
.section ".text"
relocated:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1d5dff4123e..ed4a8294800 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -26,8 +26,8 @@
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
+#include <asm/pgtable_types.h>
+#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/msr.h>
#include <asm/processor-flags.h>
@@ -35,9 +35,7 @@
.section ".text.head"
.code32
- .globl startup_32
-
-startup_32:
+ENTRY(startup_32)
cld
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments */
@@ -176,6 +174,7 @@ startup_32:
/* Jump from 32bit compatibility mode into 64bit mode. */
lret
+ENDPROC(startup_32)
no_longmode:
/* This isn't an x86-64 CPU so hang */
@@ -295,7 +294,6 @@ relocated:
call decompress_kernel
popq %rsi
-
/*
* Jump to the decompressed kernel.
*/
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index ef50c84e8b4..11f272c6f5e 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -8,6 +8,8 @@
*
* ----------------------------------------------------------------------- */
+#include <linux/linkage.h>
+
/*
* Memory copy routines
*/
@@ -15,9 +17,7 @@
.code16gcc
.text
- .globl memcpy
- .type memcpy, @function
-memcpy:
+GLOBAL(memcpy)
pushw %si
pushw %di
movw %ax, %di
@@ -31,11 +31,9 @@ memcpy:
popw %di
popw %si
ret
- .size memcpy, .-memcpy
+ENDPROC(memcpy)
- .globl memset
- .type memset, @function
-memset:
+GLOBAL(memset)
pushw %di
movw %ax, %di
movzbl %dl, %eax
@@ -48,52 +46,42 @@ memset:
rep; stosb
popw %di
ret
- .size memset, .-memset
+ENDPROC(memset)
- .globl copy_from_fs
- .type copy_from_fs, @function
-copy_from_fs:
+GLOBAL(copy_from_fs)
pushw %ds
pushw %fs
popw %ds
call memcpy
popw %ds
ret
- .size copy_from_fs, .-copy_from_fs
+ENDPROC(copy_from_fs)
- .globl copy_to_fs
- .type copy_to_fs, @function
-copy_to_fs:
+GLOBAL(copy_to_fs)
pushw %es
pushw %fs
popw %es
call memcpy
popw %es
ret
- .size copy_to_fs, .-copy_to_fs
+ENDPROC(copy_to_fs)
#if 0 /* Not currently used, but can be enabled as needed */
-
- .globl copy_from_gs
- .type copy_from_gs, @function
-copy_from_gs:
+GLOBAL(copy_from_gs)
pushw %ds
pushw %gs
popw %ds
call memcpy
popw %ds
ret
- .size copy_from_gs, .-copy_from_gs
- .globl copy_to_gs
+ENDPROC(copy_from_gs)
- .type copy_to_gs, @function
-copy_to_gs:
+GLOBAL(copy_to_gs)
pushw %es
pushw %gs
popw %es
call memcpy
popw %es
ret
- .size copy_to_gs, .-copy_to_gs
-
+ENDPROC(copy_to_gs)
#endif
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b993062e9a5..7ccff4884a2 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -19,7 +19,7 @@
#include <linux/utsrelease.h>
#include <asm/boot.h>
#include <asm/e820.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/setup.h>
#include "boot.h"
#include "offsets.h"
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index 141b6e20ed3..019c17a7585 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -15,18 +15,15 @@
#include <asm/boot.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
+#include <linux/linkage.h>
.text
-
- .globl protected_mode_jump
- .type protected_mode_jump, @function
-
.code16
/*
* void protected_mode_jump(u32 entrypoint, u32 bootparams);
*/
-protected_mode_jump:
+GLOBAL(protected_mode_jump)
movl %edx, %esi # Pointer to boot_params table
xorl %ebx, %ebx
@@ -47,12 +44,10 @@ protected_mode_jump:
.byte 0x66, 0xea # ljmpl opcode
2: .long in_pm32 # offset
.word __BOOT_CS # segment
-
- .size protected_mode_jump, .-protected_mode_jump
+ENDPROC(protected_mode_jump)
.code32
- .type in_pm32, @function
-in_pm32:
+GLOBAL(in_pm32)
# Set up data segments for flat 32-bit mode
movl %ecx, %ds
movl %ecx, %es
@@ -78,5 +73,4 @@ in_pm32:
lldt %cx
jmpl *%eax # Jump to the 32-bit entrypoint
-
- .size in_pm32, .-in_pm32
+ENDPROC(in_pm32)
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 5c023f6f652..235b81d0f6f 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.29-rc4
-# Thu Feb 12 12:57:57 2009
+# Tue Feb 24 15:50:58 2009
#
# CONFIG_64BIT is not set
CONFIG_X86_32=y
@@ -266,7 +266,9 @@ CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-# CONFIG_X86_MCE is not set
+CONFIG_X86_MCE=y
+CONFIG_X86_MCE_NONFATAL=y
+CONFIG_X86_MCE_P4THERMAL=y
CONFIG_VM86=y
# CONFIG_TOSHIBA is not set
# CONFIG_I8K is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4157cc4a2bd..9fe5d212ab4 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.29-rc4
-# Thu Feb 12 12:57:29 2009
+# Tue Feb 24 15:44:16 2009
#
CONFIG_64BIT=y
# CONFIG_X86_32 is not set
@@ -266,7 +266,9 @@ CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_X86_IO_APIC=y
CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-# CONFIG_X86_MCE is not set
+CONFIG_X86_MCE=y
+CONFIG_X86_MCE_INTEL=y
+CONFIG_X86_MCE_AMD=y
# CONFIG_I8K is not set
CONFIG_MICROCODE=y
CONFIG_MICROCODE_INTEL=y
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index dd77ac0cac4..588a7aa937e 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -33,8 +33,6 @@
#include <asm/sigframe.h>
#include <asm/sys_ia32.h>
-#define DEBUG_SIG 0
-
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
@@ -190,42 +188,47 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
/*
* Do a signal return; undo the signal stack.
*/
+#define loadsegment_gs(v) load_gs_index(v)
+#define loadsegment_fs(v) loadsegment(fs, v)
+#define loadsegment_ds(v) loadsegment(ds, v)
+#define loadsegment_es(v) loadsegment(es, v)
+
+#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; })
+#define set_user_seg(seg, v) loadsegment_##seg(v)
+
#define COPY(x) { \
get_user_ex(regs->x, &sc->x); \
}
-#define COPY_SEG_CPL3(seg) { \
- unsigned short tmp; \
- get_user_ex(tmp, &sc->seg); \
- regs->seg = tmp | 3; \
-}
+#define GET_SEG(seg) ({ \
+ unsigned short tmp; \
+ get_user_ex(tmp, &sc->seg); \
+ tmp; \
+})
+
+#define COPY_SEG_CPL3(seg) do { \
+ regs->seg = GET_SEG(seg) | 3; \
+} while (0)
#define RELOAD_SEG(seg) { \
- unsigned int cur, pre; \
- get_user_ex(pre, &sc->seg); \
- savesegment(seg, cur); \
+ unsigned int pre = GET_SEG(seg); \
+ unsigned int cur = get_user_seg(seg); \
pre |= 3; \
if (pre != cur) \
- loadsegment(seg, pre); \
+ set_user_seg(seg, pre); \
}
static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_ia32 __user *sc,
unsigned int *pax)
{
- unsigned int tmpflags, gs, oldgs, err = 0;
+ unsigned int tmpflags, err = 0;
void __user *buf;
u32 tmp;
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
-#if DEBUG_SIG
- printk(KERN_DEBUG "SIG restore_sigcontext: "
- "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
- sc, sc->err, sc->ip, sc->cs, sc->flags);
-#endif
-
get_user_try {
/*
* Reload fs and gs if they have changed in the signal
@@ -233,12 +236,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
* the handler, but does not clobber them at least in the
* normal case.
*/
- get_user_ex(gs, &sc->gs);
- gs |= 3;
- savesegment(gs, oldgs);
- if (gs != oldgs)
- load_gs_index(gs);
-
+ RELOAD_SEG(gs);
RELOAD_SEG(fs);
RELOAD_SEG(ds);
RELOAD_SEG(es);
@@ -337,17 +335,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
void __user *fpstate,
struct pt_regs *regs, unsigned int mask)
{
- int tmp, err = 0;
+ int err = 0;
put_user_try {
- savesegment(gs, tmp);
- put_user_ex(tmp, (unsigned int __user *)&sc->gs);
- savesegment(fs, tmp);
- put_user_ex(tmp, (unsigned int __user *)&sc->fs);
- savesegment(ds, tmp);
- put_user_ex(tmp, (unsigned int __user *)&sc->ds);
- savesegment(es, tmp);
- put_user_ex(tmp, (unsigned int __user *)&sc->es);
+ put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs);
+ put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs);
+ put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds);
+ put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es);
put_user_ex(regs->di, &sc->di);
put_user_ex(regs->si, &sc->si);
@@ -488,11 +482,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
-#if DEBUG_SIG
- printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
- current->comm, current->pid, frame, regs->ip, frame->pretcode);
-#endif
-
return 0;
}
@@ -574,10 +563,5 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER32_CS;
regs->ss = __USER32_DS;
-#if DEBUG_SIG
- printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
- current->comm, current->pid, frame, regs->ip, frame->pretcode);
-#endif
-
return 0;
}
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 4f8e820cf38..683d0b4c00f 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -124,10 +124,15 @@ static inline void *phys_to_virt(phys_addr_t address)
/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
+ * However, we truncate the address to unsigned int to avoid undesirable
+ * promitions in legacy drivers.
*/
-#define isa_virt_to_bus (unsigned long)virt_to_phys
-#define isa_page_to_bus page_to_phys
-#define isa_bus_to_virt phys_to_virt
+static inline unsigned int isa_virt_to_bus(volatile void *address)
+{
+ return (unsigned int)virt_to_phys(address);
+}
+#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page))
+#define isa_bus_to_virt phys_to_virt
/*
* However PCI ones are not necessarily 1:1 and therefore these interfaces
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index c1f06289b14..bd46495ff7d 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -23,6 +23,12 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+int
+reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot);
+
+void
+free_io_memtype(u64 base, unsigned long size);
+
void *
iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index b07278c55e9..8a285f356f8 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -128,7 +128,7 @@
#ifndef __ASSEMBLY__
static inline int invalid_vm86_irq(int irq)
{
- return irq < 3 || irq > 15;
+ return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
}
#endif
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 5d98d0b68ff..9320e2a8a26 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -52,70 +52,14 @@
#endif
+#define GLOBAL(name) \
+ .globl name; \
+ name:
+
#ifdef CONFIG_X86_ALIGNMENT_16
#define __ALIGN .align 16,0x90
#define __ALIGN_STR ".align 16,0x90"
#endif
-/*
- * to check ENTRY_X86/END_X86 and
- * KPROBE_ENTRY_X86/KPROBE_END_X86
- * unbalanced-missed-mixed appearance
- */
-#define __set_entry_x86 .set ENTRY_X86_IN, 0
-#define __unset_entry_x86 .set ENTRY_X86_IN, 1
-#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
-#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
-
-#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
-
-#define __check_entry_x86 \
- .ifdef ENTRY_X86_IN; \
- .ifeq ENTRY_X86_IN; \
- __macro_err_x86; \
- .abort; \
- .endif; \
- .endif
-
-#define __check_kprobe_x86 \
- .ifdef KPROBE_X86_IN; \
- .ifeq KPROBE_X86_IN; \
- __macro_err_x86; \
- .abort; \
- .endif; \
- .endif
-
-#define __check_entry_kprobe_x86 \
- __check_entry_x86; \
- __check_kprobe_x86
-
-#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
-
-#define ENTRY_X86(name) \
- __check_entry_kprobe_x86; \
- __set_entry_x86; \
- .globl name; \
- __ALIGN; \
- name:
-
-#define END_X86(name) \
- __unset_entry_x86; \
- __check_entry_kprobe_x86; \
- .size name, .-name
-
-#define KPROBE_ENTRY_X86(name) \
- __check_entry_kprobe_x86; \
- __set_kprobe_x86; \
- .pushsection .kprobes.text, "ax"; \
- .globl name; \
- __ALIGN; \
- name:
-
-#define KPROBE_END_X86(name) \
- __unset_kprobe_x86; \
- __check_entry_kprobe_x86; \
- .size name, .-name; \
- .popsection
-
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index b5486aaf36e..f1e4a79a6e4 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -33,12 +33,10 @@
/* 44=32+12, the limit we can fit into an unsigned long pfn */
#define __PHYSICAL_MASK_SHIFT 44
#define __VIRTUAL_MASK_SHIFT 32
-#define PAGETABLE_LEVELS 3
#else /* !CONFIG_X86_PAE */
#define __PHYSICAL_MASK_SHIFT 32
#define __VIRTUAL_MASK_SHIFT 32
-#define PAGETABLE_LEVELS 2
#endif /* CONFIG_X86_PAE */
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index bc73af3eda9..d38c91b7024 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,8 +1,6 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H
-#define PAGETABLE_LEVELS 4
-
#define THREAD_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
#define CURRENT_MASK (~(THREAD_SIZE - 1))
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 2c52ff76758..2d625da6603 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -16,12 +16,6 @@
(ie, 32-bit PAE). */
#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
-#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
-
-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
-#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
-
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 9709fdff661..b0e70056838 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -15,4 +15,7 @@ extern int reserve_memtype(u64 start, u64 end,
unsigned long req_type, unsigned long *ret_type);
extern int free_memtype(u64 start, u64 end);
+extern int kernel_map_sync_memtype(u64 base, unsigned long size,
+ unsigned long flag);
+
#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 09ae67efceb..daacc23e3fb 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -17,6 +17,7 @@ typedef union {
#endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD 0
+#define PAGETABLE_LEVELS 2
/*
* traditional i386 two-level paging structure:
@@ -25,6 +26,7 @@ typedef union {
#define PGDIR_SHIFT 22
#define PTRS_PER_PGD 1024
+
/*
* the i386 is two-level, so we don't really have any
* PMD directory physically.
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index bcc89625ebe..1bd5876c864 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -24,6 +24,8 @@ typedef union {
#define SHARED_KERNEL_PMD 1
#endif
+#define PAGETABLE_LEVELS 3
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 2f59135c6f2..fbf42b8e038 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -18,6 +18,7 @@ typedef struct { pteval_t pte; } pte_t;
#endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD 0
+#define PAGETABLE_LEVELS 4
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 9dafe87be2d..4d258ad76a0 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -173,6 +173,12 @@
#include <linux/types.h>
+/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
+#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
+
+/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
+
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
typedef struct { pgdval_t pgd; } pgd_t;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index dabab1a19dd..c7a98f73821 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -403,7 +403,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary);
#endif
#endif /* X86_64 */
-extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
@@ -862,6 +861,7 @@ static inline void spin_lock_prefetch(const void *x)
* User space process size: 3GB (default).
*/
#define TASK_SIZE PAGE_OFFSET
+#define TASK_SIZE_MAX TASK_SIZE
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
@@ -921,7 +921,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
/*
* User space process size. 47bits minus one guard page.
*/
-#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
@@ -930,12 +930,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
0xc0000000 : 0xFFFFe000)
#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
- IA32_PAGE_OFFSET : TASK_SIZE64)
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
- IA32_PAGE_OFFSET : TASK_SIZE64)
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX TASK_SIZE64
+#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 258ef730aaa..7043408f690 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -82,7 +82,7 @@ asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
/* kernel/signal_64.c */
asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
struct pt_regs *);
-asmlinkage long sys_rt_sigreturn(struct pt_regs *);
+long sys_rt_sigreturn(struct pt_regs *);
/* kernel/sys_x86_64.c */
asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 5e06259e90e..a0ba6138697 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
}
static __always_inline unsigned long __copy_from_user_nocache(void *to,
- const void __user *from, unsigned long n)
+ const void __user *from, unsigned long n, unsigned long total)
{
might_fault();
if (__builtin_constant_p(n)) {
@@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
static __always_inline unsigned long
__copy_from_user_inatomic_nocache(void *to, const void __user *from,
- unsigned long n)
+ unsigned long n, unsigned long total)
{
return __copy_from_user_ll_nocache_nozero(to, from, n);
}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 84210c479fc..dcaa0404cf7 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -189,17 +189,28 @@ extern long __copy_user_nocache(void *dst, const void __user *src,
unsigned size, int zerorest);
static inline int __copy_from_user_nocache(void *dst, const void __user *src,
- unsigned size)
+ unsigned size, unsigned long total)
{
might_sleep();
- return __copy_user_nocache(dst, src, size, 1);
+ /*
+ * In practice this limit means that large file write()s
+ * which get chunked to 4K copies get handled via
+ * non-temporal stores here. Smaller writes get handled
+ * via regular __copy_from_user():
+ */
+ if (likely(total >= PAGE_SIZE))
+ return __copy_user_nocache(dst, src, size, 1);
+ else
+ return __copy_from_user(dst, src, size);
}
static inline int __copy_from_user_inatomic_nocache(void *dst,
- const void __user *src,
- unsigned size)
+ const void __user *src, unsigned size, unsigned total)
{
- return __copy_user_nocache(dst, src, size, 0);
+ if (likely(total >= PAGE_SIZE))
+ return __copy_user_nocache(dst, src, size, 0);
+ else
+ return __copy_from_user_inatomic(dst, src, size);
}
unsigned long
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index 3355973b12a..580b4e29601 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -3,8 +3,8 @@
*/
#include <asm/segment.h>
#include <asm/msr-index.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <asm/page_types.h>
+#include <asm/pgtable_types.h>
#include <asm/processor-flags.h>
.code16
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index a12e6a9fb65..8ded418b059 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,7 +1,7 @@
.section .text.page_aligned
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 96258d9dc97..8ea5164cbd0 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -1,8 +1,8 @@
.text
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
+#include <asm/pgtable_types.h>
+#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/asm-offsets.h>
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a84ac7b570e..6907b8e85d5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -498,12 +498,12 @@ void *text_poke_early(void *addr, const void *opcode, size_t len)
*/
void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
{
- unsigned long flags;
char *vaddr;
int nr_pages = 2;
struct page *pages[2];
int i;
+ might_sleep();
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -517,9 +517,9 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
nr_pages = 1;
vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
BUG_ON(!vaddr);
- local_irq_save(flags);
+ local_irq_disable();
memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
- local_irq_restore(flags);
+ local_irq_enable();
vunmap(vaddr);
sync_core();
/* Could also do a CLFLUSH here to speed up CPU recovery; but
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c2f930d8664..41ab3f064cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
}
/* Enable Enhanced PowerSaver */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
- if (!(val & 1 << 16)) {
- val |= 1 << 16;
+ if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
wrmsrl(MSR_IA32_MISC_ENABLE, val);
/* Can be locked at 0 */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
- if (!(val & 1 << 16)) {
+ if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
return -ENODEV;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index f08998278a3..c9f1fdc0283 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
enable it if not. */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- if (!(l & (1<<16))) {
- l |= (1<<16);
+ if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
wrmsr(MSR_IA32_MISC_ENABLE, l, h);
/* check to see if it stuck */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- if (!(l & (1<<16))) {
+ if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
printk(KERN_INFO PFX
"couldn't enable Enhanced SpeedStep\n");
return -ENODEV;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7aeef1d327b..25c559ba8d5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -146,10 +146,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
*/
if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
- if ((lo & (1<<9)) == 0) {
+ if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
- lo |= (1<<9); /* Disable hw prefetching */
+ lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b7d78cdc0a..aa5e287c98e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -49,13 +49,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
- if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
+ if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
return;
}
- if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+ if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
tm2 = 1;
if (h & APIC_VECTOR_MASK) {
@@ -73,7 +73,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
+ wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 9b60fce09f7..f53bdcbaf38 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
- if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+ if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
cpu);
return; /* -EBUSY */
@@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
vendor_thermal_interrupt = intel_thermal_interrupt;
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+ wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e85826829cf..508bec1cee2 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -858,6 +858,9 @@ void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
*/
void __init reserve_early(u64 start, u64 end, char *name)
{
+ if (start >= end)
+ return;
+
drop_overlaps_that_are_ok(start, end);
__reserve_early(start, end, name, 0);
}
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S
index ef00bb77d7e..fbe66e626c0 100644
--- a/arch/x86/kernel/efi_stub_32.S
+++ b/arch/x86/kernel/efi_stub_32.S
@@ -6,7 +6,7 @@
*/
#include <linux/linkage.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
/*
* efi_call_phys(void *, ...) is a function with variable parameters.
@@ -113,6 +113,7 @@ ENTRY(efi_call_phys)
movl (%edx), %ecx
pushl %ecx
ret
+ENDPROC(efi_call_phys)
.previous
.data
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
index 99b47d48c9f..4c07ccab814 100644
--- a/arch/x86/kernel/efi_stub_64.S
+++ b/arch/x86/kernel/efi_stub_64.S
@@ -41,6 +41,7 @@ ENTRY(efi_call0)
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call0)
ENTRY(efi_call1)
SAVE_XMM
@@ -50,6 +51,7 @@ ENTRY(efi_call1)
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call1)
ENTRY(efi_call2)
SAVE_XMM
@@ -59,6 +61,7 @@ ENTRY(efi_call2)
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call2)
ENTRY(efi_call3)
SAVE_XMM
@@ -69,6 +72,7 @@ ENTRY(efi_call3)
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call3)
ENTRY(efi_call4)
SAVE_XMM
@@ -80,6 +84,7 @@ ENTRY(efi_call4)
addq $32, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call4)
ENTRY(efi_call5)
SAVE_XMM
@@ -92,6 +97,7 @@ ENTRY(efi_call5)
addq $48, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call5)
ENTRY(efi_call6)
SAVE_XMM
@@ -107,3 +113,4 @@ ENTRY(efi_call6)
addq $48, %rsp
RESTORE_XMM
ret
+ENDPROC(efi_call6)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index e9920683145..899e8938e79 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -47,7 +47,7 @@
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/smp.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/desc.h>
#include <asm/percpu.h>
#include <asm/dwarf2.h>
@@ -1359,7 +1359,7 @@ nmi_espfix_stack:
CFI_ADJUST_CFA_OFFSET 4
pushl %esp
CFI_ADJUST_CFA_OFFSET 4
- addw $4, (%esp)
+ addl $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fbcf96b295f..83d1836b946 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -48,7 +48,7 @@
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/hw_irq.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
#include <asm/ftrace.h>
@@ -77,20 +77,17 @@ ENTRY(ftrace_caller)
movq 8(%rbp), %rsi
subq $MCOUNT_INSN_SIZE, %rdi
-.globl ftrace_call
-ftrace_call:
+GLOBAL(ftrace_call)
call ftrace_stub
MCOUNT_RESTORE_FRAME
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
+GLOBAL(ftrace_graph_call)
jmp ftrace_stub
#endif
-.globl ftrace_stub
-ftrace_stub:
+GLOBAL(ftrace_stub)
retq
END(ftrace_caller)
@@ -110,8 +107,7 @@ ENTRY(mcount)
jnz ftrace_graph_caller
#endif
-.globl ftrace_stub
-ftrace_stub:
+GLOBAL(ftrace_stub)
retq
trace:
@@ -148,9 +144,7 @@ ENTRY(ftrace_graph_caller)
retq
END(ftrace_graph_caller)
-
-.globl return_to_handler
-return_to_handler:
+GLOBAL(return_to_handler)
subq $80, %rsp
movq %rax, (%rsp)
@@ -188,6 +182,7 @@ return_to_handler:
ENTRY(native_usergs_sysret64)
swapgs
sysretq
+ENDPROC(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
@@ -633,16 +628,14 @@ tracesys:
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
- .globl int_ret_from_sys_call
- .globl int_with_check
-int_ret_from_sys_call:
+GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
-int_with_check:
+GLOBAL(int_with_check)
LOCKDEP_SYS_EXIT_IRQ
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%edx
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 2a0aad7718d..c32ca19d591 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -11,8 +11,8 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
+#include <asm/page_types.h>
+#include <asm/pgtable_types.h>
#include <asm/desc.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2e648e3a5ea..54b29bb24e7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -329,8 +329,6 @@ early_idt_ripmsg:
#endif /* CONFIG_EARLY_PRINTK */
.previous
-.balign PAGE_SIZE
-
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
ENTRY(name)
@@ -419,7 +417,7 @@ ENTRY(phys_base)
.section .bss, "aw", @nobits
.align L1_CACHE_BYTES
ENTRY(idt_table)
- .skip 256 * 16
+ .skip IDT_ENTRIES * 16
.section .bss.page_aligned, "aw", @nobits
.align PAGE_SIZE
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 37f420018a4..f5fc8c781a6 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -121,7 +121,7 @@ static void machine_kexec_page_table_set_one(
static void machine_kexec_prepare_page_tables(struct kimage *image)
{
void *control_page;
- pmd_t *pmd = 0;
+ pmd_t *pmd = NULL;
control_page = page_address(image->control_code_page);
#ifdef CONFIG_X86_PAE
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d2f7cd5b2c8..fb2159a5c81 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -268,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task)
if (test_tsk_thread_flag(task, TIF_IA32))
return IA32_PAGE_OFFSET - 3;
#endif
- return TASK_SIZE64 - 7;
+ return TASK_SIZE_MAX - 7;
}
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index a160f311972..2064d0aa8d2 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -7,7 +7,7 @@
*/
#include <linux/linkage.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index b0bbdd4829c..d32cfb27a47 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -7,10 +7,10 @@
*/
#include <linux/linkage.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
-#include <asm/pgtable.h>
+#include <asm/pgtable_types.h>
/*
* Must be relocatable PIC code callable as a C function
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index d8ccc3c6552..66d874e5404 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -29,7 +29,7 @@
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
/* We can free up trampoline after bootup if cpu hotplug is not supported. */
#ifndef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 95a012a4664..cddfb8d386b 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -25,8 +25,8 @@
*/
#include <linux/linkage.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
+#include <asm/pgtable_types.h>
+#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c8c0a7e530b..c05430ac1b4 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -942,7 +942,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_BADSTK;
- info.si_addr = 0;
+ info.si_addr = NULL;
if (notify_die(DIE_TRAP, "iret exception",
regs, error_code, 32, SIGILL) == NOTIFY_STOP)
return;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 49b4cd6707f..33a788d5879 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -287,8 +287,7 @@ static struct clocksource clocksource_vmi;
static cycle_t read_real_cycles(void)
{
cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
- return ret >= clocksource_vmi.cycle_last ?
- ret : clocksource_vmi.cycle_last;
+ return max(ret, clocksource_vmi.cycle_last);
}
static struct clocksource clocksource_vmi = {
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 3eba7f7bac0..0d860963f26 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -12,7 +12,7 @@
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/cache.h>
#include <asm/boot.h>
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 087a7f2c639..fbfced6f680 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -6,7 +6,7 @@
#include <asm-generic/vmlinux.lds.h>
#include <asm/asm-offsets.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#undef i386 /* in case the preprocessor is a 32bit one */
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index ad374003742..51f1504cddd 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -28,7 +28,7 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 29644175490..a03b7279efa 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1,74 +1,79 @@
/*
* Copyright (C) 1995 Linus Torvalds
- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
+ * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
*/
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mmiotrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/vt_kern.h> /* For unblank_screen() */
+#include <linux/mmiotrace.h>
+#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/highmem.h>
-#include <linux/bootmem.h> /* for max_low_pfn */
-#include <linux/vmalloc.h>
-#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/vt_kern.h>
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/module.h>
#include <linux/kdebug.h>
+#include <linux/errno.h>
#include <linux/magic.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/tty.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm-generic/sections.h>
-#include <asm/system.h>
-#include <asm/desc.h>
-#include <asm/segment.h>
-#include <asm/pgalloc.h>
-#include <asm/smp.h>
#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/segment.h>
+#include <asm/system.h>
#include <asm/proto.h>
-#include <asm-generic/sections.h>
#include <asm/traps.h>
+#include <asm/desc.h>
/*
- * Page fault error code bits
- * bit 0 == 0 means no page found, 1 means protection fault
- * bit 1 == 0 means read, 1 means write
- * bit 2 == 0 means kernel, 1 means user-mode
- * bit 3 == 1 means use of reserved bit detected
- * bit 4 == 1 means fault was an instruction fetch
+ * Page fault error code bits:
+ *
+ * bit 0 == 0: no page found 1: protection fault
+ * bit 1 == 0: read access 1: write access
+ * bit 2 == 0: kernel-mode access 1: user-mode access
+ * bit 3 == 1: use of reserved bit detected
+ * bit 4 == 1: fault was an instruction fetch
*/
-#define PF_PROT (1<<0)
-#define PF_WRITE (1<<1)
-#define PF_USER (1<<2)
-#define PF_RSVD (1<<3)
-#define PF_INSTR (1<<4)
+enum x86_pf_error_code {
+ PF_PROT = 1 << 0,
+ PF_WRITE = 1 << 1,
+ PF_USER = 1 << 2,
+ PF_RSVD = 1 << 3,
+ PF_INSTR = 1 << 4,
+};
+
+/*
+ * Returns 0 if mmiotrace is disabled, or if the fault is not
+ * handled by mmiotrace:
+ */
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
-#endif
return 0;
}
static inline int notify_page_fault(struct pt_regs *regs)
{
-#ifdef CONFIG_KPROBES
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
- if (!user_mode_vm(regs)) {
+ if (kprobes_built_in() && !user_mode_vm(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@@ -76,29 +81,76 @@ static inline int notify_page_fault(struct pt_regs *regs)
}
return ret;
-#else
- return 0;
-#endif
}
/*
- * X86_32
- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
- * Check that here and ignore it.
+ * Prefetch quirks:
*
- * X86_64
- * Sometimes the CPU reports invalid exceptions on prefetch.
- * Check that here and ignore it.
+ * 32-bit mode:
*
- * Opcode checker based on code by Richard Brunner
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * 64-bit mode:
+ *
+ * Sometimes the CPU reports invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ *
+ * Opcode checker based on code by Richard Brunner.
*/
-static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
- unsigned long addr)
+static inline int
+check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
+ unsigned char opcode, int *prefetch)
{
+ unsigned char instr_hi = opcode & 0xf0;
+ unsigned char instr_lo = opcode & 0x0f;
+
+ switch (instr_hi) {
+ case 0x20:
+ case 0x30:
+ /*
+ * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
+ * In X86_64 long mode, the CPU will signal invalid
+ * opcode if some of these prefixes are present so
+ * X86_64 will never get here anyway
+ */
+ return ((instr_lo & 7) == 0x6);
+#ifdef CONFIG_X86_64
+ case 0x40:
+ /*
+ * In AMD64 long mode 0x40..0x4F are valid REX prefixes
+ * Need to figure out under what instruction mode the
+ * instruction was issued. Could check the LDT for lm,
+ * but for now it's good enough to assume that long
+ * mode only uses well known segments or kernel.
+ */
+ return (!user_mode(regs)) || (regs->cs == __USER_CS);
+#endif
+ case 0x60:
+ /* 0x64 thru 0x67 are valid prefixes in all modes. */
+ return (instr_lo & 0xC) == 0x4;
+ case 0xF0:
+ /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
+ return !instr_lo || (instr_lo>>1) == 1;
+ case 0x00:
+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
+ if (probe_kernel_address(instr, opcode))
+ return 0;
+
+ *prefetch = (instr_lo == 0xF) &&
+ (opcode == 0x0D || opcode == 0x18);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static int
+is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
+{
+ unsigned char *max_instr;
unsigned char *instr;
- int scan_more = 1;
int prefetch = 0;
- unsigned char *max_instr;
/*
* If it was a exec (instruction fetch) fault on NX page, then
@@ -107,106 +159,170 @@ static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
if (error_code & PF_INSTR)
return 0;
- instr = (unsigned char *)convert_ip_to_linear(current, regs);
+ instr = (void *)convert_ip_to_linear(current, regs);
max_instr = instr + 15;
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
return 0;
- while (scan_more && instr < max_instr) {
+ while (instr < max_instr) {
unsigned char opcode;
- unsigned char instr_hi;
- unsigned char instr_lo;
if (probe_kernel_address(instr, opcode))
break;
- instr_hi = opcode & 0xf0;
- instr_lo = opcode & 0x0f;
instr++;
- switch (instr_hi) {
- case 0x20:
- case 0x30:
- /*
- * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
- * In X86_64 long mode, the CPU will signal invalid
- * opcode if some of these prefixes are present so
- * X86_64 will never get here anyway
- */
- scan_more = ((instr_lo & 7) == 0x6);
- break;
-#ifdef CONFIG_X86_64
- case 0x40:
- /*
- * In AMD64 long mode 0x40..0x4F are valid REX prefixes
- * Need to figure out under what instruction mode the
- * instruction was issued. Could check the LDT for lm,
- * but for now it's good enough to assume that long
- * mode only uses well known segments or kernel.
- */
- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
- break;
-#endif
- case 0x60:
- /* 0x64 thru 0x67 are valid prefixes in all modes. */
- scan_more = (instr_lo & 0xC) == 0x4;
- break;
- case 0xF0:
- /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
- scan_more = !instr_lo || (instr_lo>>1) == 1;
- break;
- case 0x00:
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
- scan_more = 0;
-
- if (probe_kernel_address(instr, opcode))
- break;
- prefetch = (instr_lo == 0xF) &&
- (opcode == 0x0D || opcode == 0x18);
+ if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
break;
- default:
- scan_more = 0;
- break;
- }
}
return prefetch;
}
-static void force_sig_info_fault(int si_signo, int si_code,
- unsigned long address, struct task_struct *tsk)
+static void
+force_sig_info_fault(int si_signo, int si_code, unsigned long address,
+ struct task_struct *tsk)
{
siginfo_t info;
- info.si_signo = si_signo;
- info.si_errno = 0;
- info.si_code = si_code;
- info.si_addr = (void __user *)address;
+ info.si_signo = si_signo;
+ info.si_errno = 0;
+ info.si_code = si_code;
+ info.si_addr = (void __user *)address;
+
force_sig_info(si_signo, &info, tsk);
}
-#ifdef CONFIG_X86_64
-static int bad_address(void *p)
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+
+#ifdef CONFIG_X86_32
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
{
- unsigned long dummy;
- return probe_kernel_address((unsigned long *)p, dummy);
+ unsigned index = pgd_index(address);
+ pgd_t *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+
+ pgd += index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k))
+ return NULL;
+
+ /*
+ * set_pgd(pgd, *pgd_k); here would be useless on PAE
+ * and redundant with the set_pmd() on non-PAE. As would
+ * set_pud.
+ */
+ pud = pud_offset(pgd, address);
+ pud_k = pud_offset(pgd_k, address);
+ if (!pud_present(*pud_k))
+ return NULL;
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd_k))
+ return NULL;
+
+ if (!pmd_present(*pmd)) {
+ set_pmd(pmd, *pmd_k);
+ arch_flush_lazy_mmu_mode();
+ } else {
+ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ }
+
+ return pmd_k;
+}
+
+void vmalloc_sync_all(void)
+{
+ unsigned long address;
+
+ if (SHARED_KERNEL_PMD)
+ return;
+
+ for (address = VMALLOC_START & PMD_MASK;
+ address >= TASK_SIZE && address < FIXADDR_TOP;
+ address += PMD_SIZE) {
+
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ if (!vmalloc_sync_one(page_address(page), address))
+ break;
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+
+/*
+ * 32-bit:
+ *
+ * Handle a fault on the vmalloc or module mapping area
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Did it hit the DOS screen memory VA from vm86 mode?
+ */
+static inline void
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
+ struct task_struct *tsk)
+{
+ unsigned long bit;
+
+ if (!v8086_mode(regs))
+ return;
+
+ bit = (address - 0xA0000) >> PAGE_SHIFT;
+ if (bit < 32)
+ tsk->thread.screen_bitmap |= 1 << bit;
}
-#endif
static void dump_pagetable(unsigned long address)
{
-#ifdef CONFIG_X86_32
__typeof__(pte_val(__pte(0))) page;
page = read_cr3();
page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+
#ifdef CONFIG_X86_PAE
printk("*pdpt = %016Lx ", page);
if ((page >> PAGE_SHIFT) < max_low_pfn
&& page & _PAGE_PRESENT) {
page &= PAGE_MASK;
page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
- & (PTRS_PER_PMD - 1)];
+ & (PTRS_PER_PMD - 1)];
printk(KERN_CONT "*pde = %016Lx ", page);
page &= ~_PAGE_NX;
}
@@ -218,19 +334,145 @@ static void dump_pagetable(unsigned long address)
* We must not directly access the pte in the highpte
* case if the page table is located in highmem.
* And let's rather not kmap-atomic the pte, just in case
- * it's allocated already.
+ * it's allocated already:
*/
if ((page >> PAGE_SHIFT) < max_low_pfn
&& (page & _PAGE_PRESENT)
&& !(page & _PAGE_PSE)) {
+
page &= PAGE_MASK;
page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
+ & (PTRS_PER_PTE - 1)];
printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
}
printk("\n");
-#else /* CONFIG_X86_64 */
+}
+
+#else /* CONFIG_X86_64: */
+
+void vmalloc_sync_all(void)
+{
+ unsigned long address;
+
+ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
+ address += PGDIR_SIZE) {
+
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+
+/*
+ * 64-bit:
+ *
+ * Handle a fault on the vmalloc area
+ *
+ * This assumes no large pages in there.
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ pgd_t *pgd, *pgd_ref;
+ pud_t *pud, *pud_ref;
+ pmd_t *pmd, *pmd_ref;
+ pte_t *pte, *pte_ref;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Copy kernel mappings over when needed. This can also
+ * happen within a race in page table update. In the later
+ * case just flush:
+ */
+ pgd = pgd_offset(current->active_mm, address);
+ pgd_ref = pgd_offset_k(address);
+ if (pgd_none(*pgd_ref))
+ return -1;
+
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+
+ /*
+ * Below here mismatches are bugs because these lower tables
+ * are shared:
+ */
+
+ pud = pud_offset(pgd, address);
+ pud_ref = pud_offset(pgd_ref, address);
+ if (pud_none(*pud_ref))
+ return -1;
+
+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+ BUG();
+
+ pmd = pmd_offset(pud, address);
+ pmd_ref = pmd_offset(pud_ref, address);
+ if (pmd_none(*pmd_ref))
+ return -1;
+
+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+ BUG();
+
+ pte_ref = pte_offset_kernel(pmd_ref, address);
+ if (!pte_present(*pte_ref))
+ return -1;
+
+ pte = pte_offset_kernel(pmd, address);
+
+ /*
+ * Don't use pte_page here, because the mappings can point
+ * outside mem_map, and the NUMA hash lookup cannot handle
+ * that:
+ */
+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
+ BUG();
+
+ return 0;
+}
+
+static const char errata93_warning[] =
+KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
+KERN_ERR "******* Please consider a BIOS update.\n"
+KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+
+/*
+ * No vm86 mode in 64-bit mode:
+ */
+static inline void
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
+ struct task_struct *tsk)
+{
+}
+
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+static void dump_pagetable(unsigned long address)
+{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -239,102 +481,77 @@ static void dump_pagetable(unsigned long address)
pgd = (pgd_t *)read_cr3();
pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
+
pgd += pgd_index(address);
- if (bad_address(pgd)) goto bad;
+ if (bad_address(pgd))
+ goto bad;
+
printk("PGD %lx ", pgd_val(*pgd));
- if (!pgd_present(*pgd)) goto ret;
+
+ if (!pgd_present(*pgd))
+ goto out;
pud = pud_offset(pgd, address);
- if (bad_address(pud)) goto bad;
+ if (bad_address(pud))
+ goto bad;
+
printk("PUD %lx ", pud_val(*pud));
if (!pud_present(*pud) || pud_large(*pud))
- goto ret;
+ goto out;
pmd = pmd_offset(pud, address);
- if (bad_address(pmd)) goto bad;
+ if (bad_address(pmd))
+ goto bad;
+
printk("PMD %lx ", pmd_val(*pmd));
- if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
+ if (!pmd_present(*pmd) || pmd_large(*pmd))
+ goto out;
pte = pte_offset_kernel(pmd, address);
- if (bad_address(pte)) goto bad;
+ if (bad_address(pte))
+ goto bad;
+
printk("PTE %lx", pte_val(*pte));
-ret:
+out:
printk("\n");
return;
bad:
printk("BAD\n");
-#endif
}
-#ifdef CONFIG_X86_32
-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
-{
- unsigned index = pgd_index(address);
- pgd_t *pgd_k;
- pud_t *pud, *pud_k;
- pmd_t *pmd, *pmd_k;
-
- pgd += index;
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- return NULL;
+#endif /* CONFIG_X86_64 */
- /*
- * set_pgd(pgd, *pgd_k); here would be useless on PAE
- * and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
- */
-
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
- if (!pud_present(*pud_k))
- return NULL;
-
- pmd = pmd_offset(pud, address);
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- return NULL;
- if (!pmd_present(*pmd)) {
- set_pmd(pmd, *pmd_k);
- arch_flush_lazy_mmu_mode();
- } else
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
- return pmd_k;
-}
-#endif
-
-#ifdef CONFIG_X86_64
-static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
-#endif
-
-/* Workaround for K8 erratum #93 & buggy BIOS.
- BIOS SMM functions are required to use a specific workaround
- to avoid corruption of the 64bit RIP register on C stepping K8.
- A lot of BIOS that didn't get tested properly miss this.
- The OS sees this as a page fault with the upper 32bits of RIP cleared.
- Try to work around it here.
- Note we only handle faults in kernel here.
- Does nothing for X86_32
+/*
+ * Workaround for K8 erratum #93 & buggy BIOS.
+ *
+ * BIOS SMM functions are required to use a specific workaround
+ * to avoid corruption of the 64bit RIP register on C stepping K8.
+ *
+ * A lot of BIOS that didn't get tested properly miss this.
+ *
+ * The OS sees this as a page fault with the upper 32bits of RIP cleared.
+ * Try to work around it here.
+ *
+ * Note we only handle faults in kernel here.
+ * Does nothing on 32-bit.
*/
static int is_errata93(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_64
- static int warned;
+ static int once;
+
if (address != regs->ip)
return 0;
+
if ((address >> 32) != 0)
return 0;
+
address |= 0xffffffffUL << 32;
if ((address >= (u64)_stext && address <= (u64)_etext) ||
(address >= MODULES_VADDR && address <= MODULES_END)) {
- if (!warned) {
+ if (!once) {
printk(errata93_warning);
- warned = 1;
+ once = 1;
}
regs->ip = address;
return 1;
@@ -344,16 +561,17 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
}
/*
- * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
- * addresses >4GB. We catch this in the page fault handler because these
- * addresses are not reachable. Just detect this case and return. Any code
+ * Work around K8 erratum #100 K8 in compat mode occasionally jumps
+ * to illegal addresses >4GB.
+ *
+ * We catch this in the page fault handler because these addresses
+ * are not reachable. Just detect this case and return. Any code
* segment in LDT is compatibility mode.
*/
static int is_errata100(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_64
- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
- (address >> 32))
+ if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
return 1;
#endif
return 0;
@@ -363,8 +581,9 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_F00F_BUG
unsigned long nr;
+
/*
- * Pentium F0 0F C7 C8 bug workaround.
+ * Pentium F0 0F C7 C8 bug workaround:
*/
if (boot_cpu_data.f00f_bug) {
nr = (address - idt_descr.address) >> 3;
@@ -378,80 +597,87 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
return 0;
}
-static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
+static const char nx_warning[] = KERN_CRIT
+"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
+
+static void
+show_fault_oops(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
-#ifdef CONFIG_X86_32
if (!oops_may_print())
return;
-#endif
-#ifdef CONFIG_X86_PAE
if (error_code & PF_INSTR) {
unsigned int level;
+
pte_t *pte = lookup_address(address, &level);
if (pte && pte_present(*pte) && !pte_exec(*pte))
- printk(KERN_CRIT "kernel tried to execute "
- "NX-protected page - exploit attempt? "
- "(uid: %d)\n", current_uid());
+ printk(nx_warning, current_uid());
}
-#endif
printk(KERN_ALERT "BUG: unable to handle kernel ");
if (address < PAGE_SIZE)
printk(KERN_CONT "NULL pointer dereference");
else
printk(KERN_CONT "paging request");
+
printk(KERN_CONT " at %p\n", (void *) address);
printk(KERN_ALERT "IP:");
printk_address(regs->ip, 1);
+
dump_pagetable(address);
}
-#ifdef CONFIG_X86_64
-static noinline void pgtable_bad(struct pt_regs *regs,
- unsigned long error_code, unsigned long address)
+static noinline void
+pgtable_bad(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
- unsigned long flags = oops_begin();
- int sig = SIGKILL;
- struct task_struct *tsk = current;
+ struct task_struct *tsk;
+ unsigned long flags;
+ int sig;
+
+ flags = oops_begin();
+ tsk = current;
+ sig = SIGKILL;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
tsk->comm, address);
dump_pagetable(address);
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
+
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
+
if (__die("Bad pagetable", regs, error_code))
sig = 0;
+
oops_end(flags, regs, sig);
}
-#endif
-static noinline void no_context(struct pt_regs *regs,
- unsigned long error_code, unsigned long address)
+static noinline void
+no_context(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
struct task_struct *tsk = current;
unsigned long *stackend;
-
-#ifdef CONFIG_X86_64
unsigned long flags;
int sig;
-#endif
- /* Are we prepared to handle this kernel fault? */
+ /* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
/*
- * X86_32
- * Valid to do another page fault here, because if this fault
- * had been triggered by is_prefetch fixup_exception would have
- * handled it.
+ * 32-bit:
+ *
+ * Valid to do another page fault here, because if this fault
+ * had been triggered by is_prefetch fixup_exception would have
+ * handled it.
+ *
+ * 64-bit:
*
- * X86_64
- * Hall of shame of CPU/BIOS bugs.
+ * Hall of shame of CPU/BIOS bugs.
*/
if (is_prefetch(regs, error_code, address))
return;
@@ -461,54 +687,70 @@ static noinline void no_context(struct pt_regs *regs,
/*
* Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
+ * terminate things with extreme prejudice:
*/
-#ifdef CONFIG_X86_32
- bust_spinlocks(1);
-#else
flags = oops_begin();
-#endif
show_fault_oops(regs, error_code, address);
- stackend = end_of_stack(tsk);
+ stackend = end_of_stack(tsk);
if (*stackend != STACK_END_MAGIC)
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
-#ifdef CONFIG_X86_32
- die("Oops", regs, error_code);
- bust_spinlocks(0);
- do_exit(SIGKILL);
-#else
sig = SIGKILL;
if (__die("Oops", regs, error_code))
sig = 0;
+
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
+
oops_end(flags, regs, sig);
-#endif
}
-static void __bad_area_nosemaphore(struct pt_regs *regs,
- unsigned long error_code, unsigned long address,
- int si_code)
+/*
+ * Print out info about fatal segfaults, if the show_unhandled_signals
+ * sysctl is set:
+ */
+static inline void
+show_signal_msg(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, struct task_struct *tsk)
+{
+ if (!unhandled_signal(tsk, SIGSEGV))
+ return;
+
+ if (!printk_ratelimit())
+ return;
+
+ printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, task_pid_nr(tsk), address,
+ (void *)regs->ip, (void *)regs->sp, error_code);
+
+ print_vma_addr(KERN_CONT " in ", regs->ip);
+
+ printk(KERN_CONT "\n");
+}
+
+static void
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, int si_code)
{
struct task_struct *tsk = current;
/* User mode accesses just cause a SIGSEGV */
if (error_code & PF_USER) {
/*
- * It's possible to have interrupts off here.
+ * It's possible to have interrupts off here:
*/
local_irq_enable();
/*
* Valid to do another page fault here because this one came
- * from user space.
+ * from user space:
*/
if (is_prefetch(regs, error_code, address))
return;
@@ -516,22 +758,16 @@ static void __bad_area_nosemaphore(struct pt_regs *regs,
if (is_errata100(regs, address))
return;
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
- printk_ratelimit()) {
- printk(
- "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, task_pid_nr(tsk), address,
- (void *) regs->ip, (void *) regs->sp, error_code);
- print_vma_addr(" in ", regs->ip);
- printk("\n");
- }
+ if (unlikely(show_unhandled_signals))
+ show_signal_msg(regs, error_code, address, tsk);
+
+ /* Kernel addresses are always protection faults: */
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.trap_no = 14;
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- tsk->thread.trap_no = 14;
force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+
return;
}
@@ -541,15 +777,16 @@ static void __bad_area_nosemaphore(struct pt_regs *regs,
no_context(regs, error_code, address);
}
-static noinline void bad_area_nosemaphore(struct pt_regs *regs,
- unsigned long error_code, unsigned long address)
+static noinline void
+bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
__bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
}
-static void __bad_area(struct pt_regs *regs,
- unsigned long error_code, unsigned long address,
- int si_code)
+static void
+__bad_area(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, int si_code)
{
struct mm_struct *mm = current->mm;
@@ -562,67 +799,75 @@ static void __bad_area(struct pt_regs *regs,
__bad_area_nosemaphore(regs, error_code, address, si_code);
}
-static noinline void bad_area(struct pt_regs *regs,
- unsigned long error_code, unsigned long address)
+static noinline void
+bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
__bad_area(regs, error_code, address, SEGV_MAPERR);
}
-static noinline void bad_area_access_error(struct pt_regs *regs,
- unsigned long error_code, unsigned long address)
+static noinline void
+bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
__bad_area(regs, error_code, address, SEGV_ACCERR);
}
/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
-static void out_of_memory(struct pt_regs *regs,
- unsigned long error_code, unsigned long address)
+static void
+out_of_memory(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
/*
* We ran out of memory, call the OOM killer, and return the userspace
- * (which will retry the fault, or kill us if we got oom-killed).
+ * (which will retry the fault, or kill us if we got oom-killed):
*/
up_read(&current->mm->mmap_sem);
+
pagefault_out_of_memory();
}
-static void do_sigbus(struct pt_regs *regs,
- unsigned long error_code, unsigned long address)
+static void
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
up_read(&mm->mmap_sem);
- /* Kernel mode? Handle exceptions or die */
+ /* Kernel mode? Handle exceptions or die: */
if (!(error_code & PF_USER))
no_context(regs, error_code, address);
-#ifdef CONFIG_X86_32
- /* User space => ok to do another page fault */
+
+ /* User-space => ok to do another page fault: */
if (is_prefetch(regs, error_code, address))
return;
-#endif
- tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 14;
+
+ tsk->thread.cr2 = address;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 14;
+
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
}
-static noinline void mm_fault_error(struct pt_regs *regs,
- unsigned long error_code, unsigned long address, unsigned int fault)
+static noinline void
+mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, unsigned int fault)
{
- if (fault & VM_FAULT_OOM)
+ if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
- else if (fault & VM_FAULT_SIGBUS)
- do_sigbus(regs, error_code, address);
- else
- BUG();
+ } else {
+ if (fault & VM_FAULT_SIGBUS)
+ do_sigbus(regs, error_code, address);
+ else
+ BUG();
+ }
}
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{
if ((error_code & PF_WRITE) && !pte_write(*pte))
return 0;
+
if ((error_code & PF_INSTR) && !pte_exec(*pte))
return 0;
@@ -630,21 +875,25 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
}
/*
- * Handle a spurious fault caused by a stale TLB entry. This allows
- * us to lazily refresh the TLB when increasing the permissions of a
- * kernel page (RO -> RW or NX -> X). Doing it eagerly is very
- * expensive since that implies doing a full cross-processor TLB
- * flush, even if no stale TLB entries exist on other processors.
+ * Handle a spurious fault caused by a stale TLB entry.
+ *
+ * This allows us to lazily refresh the TLB when increasing the
+ * permissions of a kernel page (RO -> RW or NX -> X). Doing it
+ * eagerly is very expensive since that implies doing a full
+ * cross-processor TLB flush, even if no stale TLB entries exist
+ * on other processors.
+ *
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
*/
-static noinline int spurious_fault(unsigned long error_code,
- unsigned long address)
+static noinline int
+spurious_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ int ret;
/* Reserved-bit violation or user access to kernel space? */
if (error_code & (PF_USER | PF_RSVD))
@@ -672,123 +921,46 @@ static noinline int spurious_fault(unsigned long error_code,
if (!pte_present(*pte))
return 0;
- return spurious_fault_check(error_code, pte);
-}
-
-/*
- * X86_32
- * Handle a fault on the vmalloc or module mapping area
- *
- * X86_64
- * Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
-#ifdef CONFIG_X86_32
- unsigned long pgd_paddr;
- pmd_t *pmd_k;
- pte_t *pte_k;
-
- /* Make sure we are in vmalloc area */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
+ ret = spurious_fault_check(error_code, pte);
+ if (!ret)
+ return 0;
/*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "current" here. We might be inside
- * an interrupt in the middle of a task switch..
+ * Make sure we have permissions in PMD.
+ * If not, then there's a bug in the page tables:
*/
- pgd_paddr = read_cr3();
- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
- if (!pmd_k)
- return -1;
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- return -1;
- return 0;
-#else
- pgd_t *pgd, *pgd_ref;
- pud_t *pud, *pud_ref;
- pmd_t *pmd, *pmd_ref;
- pte_t *pte, *pte_ref;
+ ret = spurious_fault_check(error_code, (pte_t *) pmd);
+ WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
- /* Make sure we are in vmalloc area */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
-
- /* Copy kernel mappings over when needed. This can also
- happen within a race in page table update. In the later
- case just flush. */
-
- pgd = pgd_offset(current->active_mm, address);
- pgd_ref = pgd_offset_k(address);
- if (pgd_none(*pgd_ref))
- return -1;
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-
- /* Below here mismatches are bugs because these lower tables
- are shared */
-
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
- if (pud_none(*pud_ref))
- return -1;
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
- BUG();
- pmd = pmd_offset(pud, address);
- pmd_ref = pmd_offset(pud_ref, address);
- if (pmd_none(*pmd_ref))
- return -1;
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
- BUG();
- pte_ref = pte_offset_kernel(pmd_ref, address);
- if (!pte_present(*pte_ref))
- return -1;
- pte = pte_offset_kernel(pmd, address);
- /* Don't use pte_page here, because the mappings can point
- outside mem_map, and the NUMA hash lookup cannot handle
- that. */
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
- BUG();
- return 0;
-#endif
+ return ret;
}
int show_unhandled_signals = 1;
-static inline int access_error(unsigned long error_code, int write,
- struct vm_area_struct *vma)
+static inline int
+access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
{
if (write) {
- /* write, present and write, not present */
+ /* write, present and write, not present: */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
- } else if (unlikely(error_code & PF_PROT)) {
- /* read, present */
- return 1;
- } else {
- /* read, not present */
- if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
- return 1;
+ return 0;
}
+ /* read, present: */
+ if (unlikely(error_code & PF_PROT))
+ return 1;
+
+ /* read, not present: */
+ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+ return 1;
+
return 0;
}
static int fault_in_kernel_space(unsigned long address)
{
-#ifdef CONFIG_X86_32
- return address >= TASK_SIZE;
-#else /* !CONFIG_X86_32 */
- return address >= TASK_SIZE64;
-#endif /* CONFIG_X86_32 */
+ return address >= TASK_SIZE_MAX;
}
/*
@@ -796,23 +968,22 @@ static int fault_in_kernel_space(unsigned long address)
* and the problem, and then passes it off to one of the appropriate
* routines.
*/
-#ifdef CONFIG_X86_64
-asmlinkage
-#endif
-void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void __kprobes
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
- unsigned long address;
+ struct vm_area_struct *vma;
struct task_struct *tsk;
+ unsigned long address;
struct mm_struct *mm;
- struct vm_area_struct *vma;
int write;
int fault;
tsk = current;
mm = tsk->mm;
+
prefetchw(&mm->mmap_sem);
- /* get the address */
+ /* Get the faulting address: */
address = read_cr2();
if (unlikely(kmmio_fault(regs, address)))
@@ -836,22 +1007,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
vmalloc_fault(address) >= 0)
return;
- /* Can handle a stale RO->RW TLB */
+ /* Can handle a stale RO->RW TLB: */
if (spurious_fault(error_code, address))
return;
- /* kprobes don't want to hook the spurious faults. */
+ /* kprobes don't want to hook the spurious faults: */
if (notify_page_fault(regs))
return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
+ * fault we could otherwise deadlock:
*/
bad_area_nosemaphore(regs, error_code, address);
+
return;
}
- /* kprobes don't want to hook the spurious faults. */
+ /* kprobes don't want to hook the spurious faults: */
if (unlikely(notify_page_fault(regs)))
return;
/*
@@ -859,22 +1031,22 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
* vmalloc fault has been handled.
*
* User-mode registers count as a user access even for any
- * potential system fault or CPU buglet.
+ * potential system fault or CPU buglet:
*/
if (user_mode_vm(regs)) {
local_irq_enable();
error_code |= PF_USER;
- } else if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
+ } else {
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
+ }
-#ifdef CONFIG_X86_64
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
-#endif
/*
- * If we're in an interrupt, have no user context or are running in an
- * atomic region then we must not take the fault.
+ * If we're in an interrupt, have no user context or are running
+ * in an atomic region then we must not take the fault:
*/
if (unlikely(in_atomic() || !mm)) {
bad_area_nosemaphore(regs, error_code, address);
@@ -883,19 +1055,19 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
/*
* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
- * kernel and should generate an OOPS. Unfortunately, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
- * we will deadlock attempting to validate the fault against the
- * address space. Luckily the kernel only validly references user
- * space from well defined areas of code, which are listed in the
- * exceptions table.
+ * addresses in user space. All other faults represent errors in
+ * the kernel and should generate an OOPS. Unfortunately, in the
+ * case of an erroneous fault occurring in a code path which already
+ * holds mmap_sem we will deadlock attempting to validate the fault
+ * against the address space. Luckily the kernel only validly
+ * references user space from well defined areas of code, which are
+ * listed in the exceptions table.
*
* As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibility of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
+ * the source reference check when there is a possibility of a
+ * deadlock. Attempt to lock the address space, if we cannot we then
+ * validate the source. If this is invalid we can skip the address
+ * space check, thus avoiding the deadlock:
*/
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
if ((error_code & PF_USER) == 0 &&
@@ -906,8 +1078,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
down_read(&mm->mmap_sem);
} else {
/*
- * The above down_read_trylock() might have succeeded in which
- * case we'll have missed the might_sleep() from down_read().
+ * The above down_read_trylock() might have succeeded in
+ * which case we'll have missed the might_sleep() from
+ * down_read():
*/
might_sleep();
}
@@ -927,7 +1100,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
/*
* Accessing the stack below %sp is always a bug.
* The large cushion allows instructions like enter
- * and pusha to work. ("enter $65535,$31" pushes
+ * and pusha to work. ("enter $65535, $31" pushes
* 32 pointers and then decrements %sp by 65535.)
*/
if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
@@ -946,6 +1119,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/
good_area:
write = error_code & PF_WRITE;
+
if (unlikely(access_error(error_code, write, vma))) {
bad_area_access_error(regs, error_code, address);
return;
@@ -954,75 +1128,21 @@ good_area:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
- * the fault.
+ * the fault:
*/
fault = handle_mm_fault(mm, vma, address, write);
+
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, error_code, address, fault);
return;
}
+
if (fault & VM_FAULT_MAJOR)
tsk->maj_flt++;
else
tsk->min_flt++;
-#ifdef CONFIG_X86_32
- /*
- * Did it hit the DOS screen memory VA from vm86 mode?
- */
- if (v8086_mode(regs)) {
- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
- if (bit < 32)
- tsk->thread.screen_bitmap |= 1 << bit;
- }
-#endif
- up_read(&mm->mmap_sem);
-}
-
-DEFINE_SPINLOCK(pgd_lock);
-LIST_HEAD(pgd_list);
+ check_v8086_mode(regs, address, tsk);
-void vmalloc_sync_all(void)
-{
- unsigned long address;
-
-#ifdef CONFIG_X86_32
- if (SHARED_KERNEL_PMD)
- return;
-
- for (address = VMALLOC_START & PMD_MASK;
- address >= TASK_SIZE && address < FIXADDR_TOP;
- address += PMD_SIZE) {
- unsigned long flags;
- struct page *page;
-
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- if (!vmalloc_sync_one(page_address(page),
- address))
- break;
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-#else /* CONFIG_X86_64 */
- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
- address += PGDIR_SIZE) {
- const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
- struct page *page;
-
- if (pgd_none(*pgd_ref))
- continue;
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-#endif
+ up_read(&mm->mmap_sem);
}
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index ca53224fc56..d5e28424622 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -20,6 +20,64 @@
#include <asm/pat.h>
#include <linux/module.h>
+#ifdef CONFIG_X86_PAE
+static int
+is_io_mapping_possible(resource_size_t base, unsigned long size)
+{
+ return 1;
+}
+#else
+static int
+is_io_mapping_possible(resource_size_t base, unsigned long size)
+{
+ /* There is no way to map greater than 1 << 32 address without PAE */
+ if (base + size > 0x100000000ULL)
+ return 0;
+
+ return 1;
+}
+#endif
+
+int
+reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot)
+{
+ unsigned long ret_flag;
+
+ if (!is_io_mapping_possible(base, size))
+ goto out_err;
+
+ if (!pat_enabled) {
+ *prot = pgprot_noncached(PAGE_KERNEL);
+ return 0;
+ }
+
+ if (reserve_memtype(base, base + size, _PAGE_CACHE_WC, &ret_flag))
+ goto out_err;
+
+ if (ret_flag == _PAGE_CACHE_WB)
+ goto out_free;
+
+ if (kernel_map_sync_memtype(base, size, ret_flag))
+ goto out_free;
+
+ *prot = __pgprot(__PAGE_KERNEL | ret_flag);
+ return 0;
+
+out_free:
+ free_memtype(base, base + size);
+out_err:
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(reserve_io_memtype_wc);
+
+void
+free_io_memtype(u64 base, unsigned long size)
+{
+ if (pat_enabled)
+ free_memtype(base, base + size);
+}
+EXPORT_SYMBOL_GPL(free_io_memtype);
+
/* Map 'pfn' using fixed map 'type' and protections 'prot'
*/
void *
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 9cab18b0b85..0bcd7883d03 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -9,44 +9,44 @@
#include <asm/e820.h>
-static void __init memtest(unsigned long start_phys, unsigned long size,
- unsigned pattern)
+static u64 patterns[] __initdata = {
+ 0,
+ 0xffffffffffffffffULL,
+ 0x5555555555555555ULL,
+ 0xaaaaaaaaaaaaaaaaULL,
+ 0x1111111111111111ULL,
+ 0x2222222222222222ULL,
+ 0x4444444444444444ULL,
+ 0x8888888888888888ULL,
+ 0x3333333333333333ULL,
+ 0x6666666666666666ULL,
+ 0x9999999999999999ULL,
+ 0xccccccccccccccccULL,
+ 0x7777777777777777ULL,
+ 0xbbbbbbbbbbbbbbbbULL,
+ 0xddddddddddddddddULL,
+ 0xeeeeeeeeeeeeeeeeULL,
+ 0x7a6c7258554e494cULL, /* yeah ;-) */
+};
+
+static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
{
- unsigned long i;
- unsigned long *start;
- unsigned long start_bad;
- unsigned long last_bad;
- unsigned long val;
- unsigned long start_phys_aligned;
- unsigned long count;
- unsigned long incr;
-
- switch (pattern) {
- case 0:
- val = 0UL;
- break;
- case 1:
- val = -1UL;
- break;
- case 2:
-#ifdef CONFIG_X86_64
- val = 0x5555555555555555UL;
-#else
- val = 0x55555555UL;
-#endif
- break;
- case 3:
-#ifdef CONFIG_X86_64
- val = 0xaaaaaaaaaaaaaaaaUL;
-#else
- val = 0xaaaaaaaaUL;
-#endif
- break;
- default:
- return;
- }
+ printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n",
+ (unsigned long long) pattern,
+ (unsigned long long) start_bad,
+ (unsigned long long) end_bad);
+ reserve_early(start_bad, end_bad, "BAD RAM");
+}
- incr = sizeof(unsigned long);
+static void __init memtest(u64 pattern, u64 start_phys, u64 size)
+{
+ u64 i, count;
+ u64 *start;
+ u64 start_bad, last_bad;
+ u64 start_phys_aligned;
+ size_t incr;
+
+ incr = sizeof(pattern);
start_phys_aligned = ALIGN(start_phys, incr);
count = (size - (start_phys_aligned - start_phys))/incr;
start = __va(start_phys_aligned);
@@ -54,25 +54,42 @@ static void __init memtest(unsigned long start_phys, unsigned long size,
last_bad = 0;
for (i = 0; i < count; i++)
- start[i] = val;
+ start[i] = pattern;
for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
- if (*start != val) {
- if (start_phys_aligned == last_bad + incr) {
- last_bad += incr;
- } else {
- if (start_bad) {
- printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
- val, start_bad, last_bad + incr);
- reserve_early(start_bad, last_bad + incr, "BAD RAM");
- }
- start_bad = last_bad = start_phys_aligned;
- }
+ if (*start == pattern)
+ continue;
+ if (start_phys_aligned == last_bad + incr) {
+ last_bad += incr;
+ continue;
}
+ if (start_bad)
+ reserve_bad_mem(pattern, start_bad, last_bad + incr);
+ start_bad = last_bad = start_phys_aligned;
}
- if (start_bad) {
- printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
- val, start_bad, last_bad + incr);
- reserve_early(start_bad, last_bad + incr, "BAD RAM");
+ if (start_bad)
+ reserve_bad_mem(pattern, start_bad, last_bad + incr);
+}
+
+static void __init do_one_pass(u64 pattern, u64 start, u64 end)
+{
+ u64 size = 0;
+
+ while (start < end) {
+ start = find_e820_area_size(start, &size, 1);
+
+ /* done ? */
+ if (start >= end)
+ break;
+ if (start + size > end)
+ size = end - start;
+
+ printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
+ (unsigned long long) start,
+ (unsigned long long) start + size,
+ (unsigned long long) cpu_to_be64(pattern));
+ memtest(pattern, start, size);
+
+ start += size;
}
}
@@ -90,33 +107,22 @@ early_param("memtest", parse_memtest);
void __init early_memtest(unsigned long start, unsigned long end)
{
- u64 t_start, t_size;
- unsigned pattern;
+ unsigned int i;
+ unsigned int idx = 0;
if (!memtest_pattern)
return;
- printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
- for (pattern = 0; pattern < memtest_pattern; pattern++) {
- t_start = start;
- t_size = 0;
- while (t_start < end) {
- t_start = find_e820_area_size(t_start, &t_size, 1);
-
- /* done ? */
- if (t_start >= end)
- break;
- if (t_start + t_size > end)
- t_size = end - t_start;
-
- printk(KERN_CONT "\n %010llx - %010llx pattern %d",
- (unsigned long long)t_start,
- (unsigned long long)t_start + t_size, pattern);
-
- memtest(t_start, t_size, pattern);
+ printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
+ for (i = 0; i < memtest_pattern; i++) {
+ idx = i % ARRAY_SIZE(patterns);
+ do_one_pass(patterns[idx], start, end);
+ }
- t_start += t_size;
- }
+ if (idx > 0) {
+ printk(KERN_INFO "early_memtest: wipe out "
+ "test pattern from memory\n");
+ /* additional test with pattern 0 will do this */
+ do_one_pass(0, start, end);
}
- printk(KERN_CONT "\n");
}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index d1f7439d173..3957cd6d645 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -194,7 +194,7 @@ void *alloc_remap(int nid, unsigned long size)
size = ALIGN(size, L1_CACHE_BYTES);
if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
- return 0;
+ return NULL;
node_remap_alloc_vaddr[nid] += size;
memset(allocation, 0, size);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7be47d1a97e..8253bc97587 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -482,6 +482,13 @@ static int split_large_page(pte_t *kpte, unsigned long address)
pbase = (pte_t *)page_address(base);
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ /*
+ * If we ever want to utilize the PAT bit, we need to
+ * update this function to make sure it's converted from
+ * bit 12 to bit 7 when we cross from the 2MB level to
+ * the 4K level:
+ */
+ WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE);
#ifdef CONFIG_X86_64
if (level == PG_LEVEL_1G) {
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 05f9aef6818..fdfedb65d45 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -634,6 +634,33 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
}
/*
+ * Change the memory type for the physial address range in kernel identity
+ * mapping space if that range is a part of identity map.
+ */
+int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
+{
+ unsigned long id_sz;
+
+ if (!pat_enabled || base >= __pa(high_memory))
+ return 0;
+
+ id_sz = (__pa(high_memory) < base + size) ?
+ __pa(high_memory) - base :
+ size;
+
+ if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
+ printk(KERN_INFO
+ "%s:%d ioremap_change_attr failed %s "
+ "for %Lx-%Lx\n",
+ current->comm, current->pid,
+ cattr_name(flags),
+ base, (unsigned long long)(base + size));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
* Internal interface to reserve a range of physical memory with prot.
* Reserved non RAM regions only and after successful reserve_memtype,
* this func also keeps identity mapping (if any) in sync with this new prot.
@@ -642,7 +669,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
int strict_prot)
{
int is_ram = 0;
- int id_sz, ret;
+ int ret;
unsigned long flags;
unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
@@ -679,23 +706,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
flags);
}
- /* Need to keep identity mapping in sync */
- if (paddr >= __pa(high_memory))
- return 0;
-
- id_sz = (__pa(high_memory) < paddr + size) ?
- __pa(high_memory) - paddr :
- size;
-
- if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
+ if (kernel_map_sync_memtype(paddr, size, flags) < 0) {
free_memtype(paddr, paddr + size);
- printk(KERN_ERR
- "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
- "for %Lx-%Lx\n",
- current->comm, current->pid,
- cattr_name(flags),
- (unsigned long long)paddr,
- (unsigned long long)(paddr + size));
return -EINVAL;
}
return 0;
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index d1e9b53f9d3..b641388d828 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -8,7 +8,7 @@
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 000415947d9..9356547d8c0 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -18,7 +18,7 @@
.text
#include <linux/linkage.h>
#include <asm/segment.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 9c98cc6ba97..7133cdf9098 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -85,8 +85,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
unsigned long addr, end;
unsigned offset;
end = (start + PMD_SIZE - 1) & PMD_MASK;
- if (end >= TASK_SIZE64)
- end = TASK_SIZE64;
+ if (end >= TASK_SIZE_MAX)
+ end = TASK_SIZE_MAX;
end -= len;
/* This loses some more bits than a modulo, but is cheaper */
offset = get_random_int() & (PTRS_PER_PTE - 1);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 86497d5f44c..c52f4034c7f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -940,6 +940,9 @@ asmlinkage void __init xen_start_kernel(void)
possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+ local_irq_disable();
+ early_boot_irqs_off();
+
xen_raw_console_write("mapping kernel into physical memory\n");
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 63d49a523ed..1a5ff24e29c 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -8,7 +8,7 @@
#include <asm/boot.h>
#include <asm/asm.h>
-#include <asm/page.h>
+#include <asm/page_types.h>
#include <xen/interface/elfnote.h>
#include <asm/xen/interface.h>