aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/Makefile18
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_64.S8
-rw-r--r--arch/x86/boot/cpu.c26
-rw-r--r--arch/x86/boot/mkcpustr.c49
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c10
-rw-r--r--arch/x86/kernel/cpu/cpu.h9
-rw-r--r--arch/x86/kernel/cpu/feature_names.c83
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c107
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c16
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h4
-rw-r--r--arch/x86/kernel/cpu/proc.c74
-rw-r--r--arch/x86/kernel/cpuid.c52
-rw-r--r--arch/x86/kernel/efi.c57
-rw-r--r--arch/x86/kernel/efi_64.c22
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/ldt.c3
-rw-r--r--arch/x86/kernel/msr.c14
-rw-r--r--arch/x86/kernel/pci-gart_64.c5
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/setup_64.c76
-rw-r--r--arch/x86/kernel/test_nx.c12
-rw-r--r--arch/x86/kernel/trampoline_32.S7
-rw-r--r--arch/x86/kernel/trampoline_64.S3
-rw-r--r--arch/x86/kernel/vmi_32.c6
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/lib/bitops_32.c2
-rw-r--r--arch/x86/lib/bitops_64.c2
-rw-r--r--arch/x86/lib/mmx_32.c31
-rw-r--r--arch/x86/lib/usercopy_32.c12
-rw-r--r--arch/x86/lib/usercopy_64.c12
-rw-r--r--arch/x86/mm/fault.c34
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/init_64.c49
-rw-r--r--arch/x86/mm/ioremap.c41
-rw-r--r--arch/x86/mm/numa_64.c7
-rw-r--r--arch/x86/mm/pageattr-test.c3
-rw-r--r--arch/x86/mm/pageattr.c400
-rw-r--r--arch/x86/mm/pgtable_32.c61
-rw-r--r--arch/x86/pci/numa.c52
45 files changed, 713 insertions, 689 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7109037bdf7..59eef1c7fda 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -18,6 +18,8 @@ config X86_64
### Arch settings
config X86
def_bool y
+ select HAVE_OPROFILE
+ select HAVE_KPROBES
config GENERIC_LOCKBREAK
def_bool n
@@ -106,10 +108,6 @@ config GENERIC_TIME_VSYSCALL
config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64
-config ARCH_SUPPORTS_OPROFILE
- bool
- default y
-
select HAVE_KVM
config ARCH_HIBERNATION_POSSIBLE
@@ -204,8 +202,7 @@ config SMP
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
Management" code will be disabled if you say Y here.
- See also the <file:Documentation/smp.txt>,
- <file:Documentation/i386/IO-APIC.txt>,
+ See also <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.tldp.org/docs.html#howto>.
@@ -309,6 +306,7 @@ config X86_RDC321X
select M486
select X86_REBOOTFIXUPS
select GENERIC_GPIO
+ select LEDS_CLASS
select LEDS_GPIO
help
This option is needed for RDC R-321x system-on-chip, also known
@@ -1597,8 +1595,6 @@ source "drivers/firmware/Kconfig"
source "fs/Kconfig"
-source "kernel/Kconfig.instrumentation"
-
source "arch/x86/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8978e98bed5..364865b1b08 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -92,7 +92,6 @@ KBUILD_AFLAGS += $(cfi) $(cfi-sigframe)
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
LDFLAGS := -m elf_$(UTS_MACHINE)
-OBJCOPYFLAGS := -O binary -R .note -R .comment -S
# Speed up the build
KBUILD_CFLAGS += -pipe
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 349b81a39c4..f88458e83ef 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,7 +26,7 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
#RAMDISK := -DRAMDISK=512
targets := vmlinux.bin setup.bin setup.elf zImage bzImage
-subdir- := compressed
+subdir- := compressed
setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
@@ -43,9 +43,17 @@ setup-y += video-vesa.o
setup-y += video-bios.o
targets += $(setup-y)
-hostprogs-y := tools/build
+hostprogs-y := mkcpustr tools/build
-HOSTCFLAGS_build.o := $(LINUXINCLUDE)
+HOST_EXTRACFLAGS += $(LINUXINCLUDE)
+
+$(obj)/cpu.o: $(obj)/cpustr.h
+
+quiet_cmd_cpustr = CPUSTR $@
+ cmd_cpustr = $(obj)/mkcpustr > $@
+targets += cpustr.h
+$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
+ $(call if_changed,cpustr)
# ---------------------------------------------------------------------------
@@ -80,6 +88,7 @@ $(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
$(call if_changed,image)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
@@ -90,7 +99,6 @@ $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_setup.bin := -O binary
-
$(obj)/setup.bin: $(obj)/setup.elf FORCE
$(call if_changed,objcopy)
@@ -98,7 +106,7 @@ $(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
-FDARGS =
+FDARGS =
# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
FDINITRD =
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fe24ceabd90..d2b9f3bb87c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,6 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $
$(call if_changed,ld)
@:
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1ccb38a7f0d..e8657b98c90 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -80,8 +80,8 @@ startup_32:
#ifdef CONFIG_RELOCATABLE
movl %ebp, %ebx
- addl $(LARGE_PAGE_SIZE -1), %ebx
- andl $LARGE_PAGE_MASK, %ebx
+ addl $(PMD_PAGE_SIZE -1), %ebx
+ andl $PMD_PAGE_MASK, %ebx
#else
movl $CONFIG_PHYSICAL_START, %ebx
#endif
@@ -220,8 +220,8 @@ ENTRY(startup_64)
/* Start with the delta to where the kernel will run at. */
#ifdef CONFIG_RELOCATABLE
leaq startup_32(%rip) /* - $startup_32 */, %rbp
- addq $(LARGE_PAGE_SIZE - 1), %rbp
- andq $LARGE_PAGE_MASK, %rbp
+ addq $(PMD_PAGE_SIZE - 1), %rbp
+ andq $PMD_PAGE_MASK, %rbp
movq %rbp, %rbx
#else
movq $CONFIG_PHYSICAL_START, %rbp
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 2a5c32da585..00e19edd852 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -1,7 +1,7 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright 2007 rPath, Inc. - All Rights Reserved
+ * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2.
@@ -9,7 +9,7 @@
* ----------------------------------------------------------------------- */
/*
- * arch/i386/boot/cpu.c
+ * arch/x86/boot/cpu.c
*
* Check for obligatory CPU features and abort if the features are not
* present.
@@ -19,6 +19,8 @@
#include "bitops.h"
#include <asm/cpufeature.h>
+#include "cpustr.h"
+
static char *cpu_name(int level)
{
static char buf[6];
@@ -35,6 +37,7 @@ int validate_cpu(void)
{
u32 *err_flags;
int cpu_level, req_level;
+ const unsigned char *msg_strs;
check_cpu(&cpu_level, &req_level, &err_flags);
@@ -51,13 +54,26 @@ int validate_cpu(void)
puts("This kernel requires the following features "
"not present on the CPU:\n");
+ msg_strs = (const unsigned char *)x86_cap_strs;
+
for (i = 0; i < NCAPINTS; i++) {
u32 e = err_flags[i];
for (j = 0; j < 32; j++) {
- if (e & 1)
- printf("%d:%d ", i, j);
-
+ int n = (i << 5)+j;
+ if (*msg_strs < n) {
+ /* Skip to the next string */
+ do {
+ msg_strs++;
+ } while (*msg_strs);
+ msg_strs++;
+ }
+ if (e & 1) {
+ if (*msg_strs == n && msg_strs[1])
+ printf("%s ", msg_strs+1);
+ else
+ printf("%d:%d ", i, j);
+ }
e >>= 1;
}
}
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
new file mode 100644
index 00000000000..bbe76953bae
--- /dev/null
+++ b/arch/x86/boot/mkcpustr.c
@@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2008 rPath, Inc. - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * This is a host program to preprocess the CPU strings into a
+ * compact format suitable for the setup code.
+ */
+
+#include <stdio.h>
+
+#include "../kernel/cpu/feature_names.c"
+
+#if NCAPFLAGS > 8
+# error "Need to adjust the boot code handling of CPUID strings"
+#endif
+
+int main(void)
+{
+ int i;
+ const char *str;
+
+ printf("static const char x86_cap_strs[] = \n");
+
+ for (i = 0; i < NCAPINTS*32; i++) {
+ str = x86_cap_flags[i];
+
+ if (i == NCAPINTS*32-1) {
+ /* The last entry must be unconditional; this
+ also consumes the compiler-added null character */
+ if (!str)
+ str = "";
+ printf("\t\"\\x%02x\"\"%s\"\n", i, str);
+ } else if (str) {
+ printf("#if REQUIRED_MASK%d & (1 << %d)\n"
+ "\t\"\\x%02x\"\"%s\\0\"\n"
+ "#endif\n",
+ i >> 5, i & 31, i, str);
+ }
+ }
+ printf("\t;\n");
+ return 0;
+}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6f813009d44..21dc1a061bf 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -37,7 +37,8 @@ obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_PCI) += early-quirks.o
-obj-$(CONFIG_APM) += apm_32.o
+apm-y := apm_32.o
+obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o
obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o
@@ -74,7 +75,8 @@ ifdef CONFIG_INPUT_PCSPKR
obj-y += pcspeaker.o
endif
-obj-$(CONFIG_SCx200) += scx200_32.o
+obj-$(CONFIG_SCx200) += scx200.o
+scx200-y += scx200_32.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfdb2f3bd76..a0c4d7c5dbd 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,6 +3,7 @@
#
obj-y := intel_cacheinfo.o addon_cpuid_features.o
+obj-y += feature_names.o
obj-$(CONFIG_X86_32) += common.o proc.o bugs.o
obj-$(CONFIG_X86_32) += amd.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b7b2142b58e..d9313d9adce 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -623,16 +623,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
* They will insert themselves into the cpu_devs structure.
* Then, when cpu_init() is called, we can just iterate over that array.
*/
-
-extern int intel_cpu_init(void);
-extern int cyrix_init_cpu(void);
-extern int nsc_init_cpu(void);
-extern int amd_init_cpu(void);
-extern int centaur_init_cpu(void);
-extern int transmeta_init_cpu(void);
-extern int nexgen_init_cpu(void);
-extern int umc_init_cpu(void);
-
void __init early_cpu_init(void)
{
intel_cpu_init();
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ad6527a5beb..e0b38c33d84 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -27,3 +27,12 @@ extern void display_cacheinfo(struct cpuinfo_x86 *c);
extern void early_init_intel(struct cpuinfo_x86 *c);
extern void early_init_amd(struct cpuinfo_x86 *c);
+/* Specific CPU type init functions */
+int intel_cpu_init(void);
+int amd_init_cpu(void);
+int cyrix_init_cpu(void);
+int nsc_init_cpu(void);
+int centaur_init_cpu(void);
+int transmeta_init_cpu(void);
+int nexgen_init_cpu(void);
+int umc_init_cpu(void);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
new file mode 100644
index 00000000000..ee975ac6bbc
--- /dev/null
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -0,0 +1,83 @@
+/*
+ * Strings for the various x86 capability flags.
+ *
+ * This file must not contain any executable code.
+ */
+
+#include "asm/cpufeature.h"
+
+/*
+ * These flag bits must match the definitions in <asm/cpufeature.h>.
+ * NULL means this bit is undefined or reserved; either way it doesn't
+ * have meaning as far as Linux is concerned. Note that it's important
+ * to realize there is a difference between this table and CPUID -- if
+ * applications want to get the raw CPUID data, they should access
+ * /dev/cpu/<cpu_nr>/cpuid instead.
+ */
+const char * const x86_cap_flags[NCAPINTS*32] = {
+ /* Intel-defined */
+ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+
+ /* AMD-defined */
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
+ NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
+ "3dnowext", "3dnow",
+
+ /* Transmeta-defined */
+ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Other (Linux-defined) */
+ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
+ NULL, NULL, NULL, NULL,
+ "constant_tsc", "up", NULL, "arch_perfmon",
+ "pebs", "bts", NULL, NULL,
+ "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Intel-defined (#2) */
+ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
+ "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
+ NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* VIA/Cyrix/Centaur-defined */
+ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
+ "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* AMD-defined (#2) */
+ "lahf_lm", "cmp_legacy", "svm", "extapic",
+ "cr8_legacy", "abm", "sse4a", "misalignsse",
+ "3dnowprefetch", "osvw", "ibs", "sse5",
+ "skinit", "wdt", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* Auxiliary (Linux-defined) */
+ "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+
+const char *const x86_power_flags[32] = {
+ "ts", /* temperature sensor */
+ "fid", /* frequency id control */
+ "vid", /* voltage id control */
+ "ttp", /* thermal trip */
+ "tm",
+ "stc",
+ "100mhzsteps",
+ "hwpstate",
+ "", /* tsc invariant mapped to constant_tsc */
+ /* nothing */
+};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d1c372b018d..fae31ce747b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/ds.h>
+#include <asm/bugs.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 8e139c70f88..ff14c320040 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -7,8 +7,6 @@
#include <asm/processor-flags.h>
#include "mtrr.h"
-int arr3_protected;
-
static void
cyrix_get_arr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type * type)
@@ -99,8 +97,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
case 4:
return replace_reg;
case 3:
- if (arr3_protected)
- break;
case 2:
case 1:
case 0:
@@ -115,8 +111,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
} else {
for (i = 0; i < 7; i++) {
cyrix_get_arr(i, &lbase, &lsize, &ltype);
- if ((i == 3) && arr3_protected)
- continue;
if (lsize == 0)
return i;
}
@@ -260,107 +254,6 @@ static void cyrix_set_all(void)
post_set();
}
-#if 0
-/*
- * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
- * with the SMM (System Management Mode) mode. So we need the following:
- * Check whether SMI_LOCK (CCR3 bit 0) is set
- * if it is set, write a warning message: ARR3 cannot be changed!
- * (it cannot be changed until the next processor reset)
- * if it is reset, then we can change it, set all the needed bits:
- * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
- * - disable access to SMM memory (CCR1 bit 2 reset)
- * - disable SMM mode (CCR1 bit 1 reset)
- * - disable write protection of ARR3 (CCR6 bit 1 reset)
- * - (maybe) disable ARR3
- * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
- */
-static void __init
-cyrix_arr_init(void)
-{
- struct set_mtrr_context ctxt;
- unsigned char ccr[7];
- int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
-#ifdef CONFIG_SMP
- int i;
-#endif
-
- /* flush cache and enable MAPEN */
- set_mtrr_prepare_save(&ctxt);
- set_mtrr_cache_disable(&ctxt);
-
- /* Save all CCRs locally */
- ccr[0] = getCx86(CX86_CCR0);
- ccr[1] = getCx86(CX86_CCR1);
- ccr[2] = getCx86(CX86_CCR2);
- ccr[3] = ctxt.ccr3;
- ccr[4] = getCx86(CX86_CCR4);
- ccr[5] = getCx86(CX86_CCR5);
- ccr[6] = getCx86(CX86_CCR6);
-
- if (ccr[3] & 1) {
- ccrc[3] = 1;
- arr3_protected = 1;
- } else {
- /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
- * access to SMM memory through ARR3 (bit 7).
- */
- if (ccr[1] & 0x80) {
- ccr[1] &= 0x7f;
- ccrc[1] |= 0x80;
- }
- if (ccr[1] & 0x04) {
- ccr[1] &= 0xfb;
- ccrc[1] |= 0x04;
- }
- if (ccr[1] & 0x02) {
- ccr[1] &= 0xfd;
- ccrc[1] |= 0x02;
- }
- arr3_protected = 0;
- if (ccr[6] & 0x02) {
- ccr[6] &= 0xfd;
- ccrc[6] = 1; /* Disable write protection of ARR3 */
- setCx86(CX86_CCR6, ccr[6]);
- }
- /* Disable ARR3. This is safe now that we disabled SMM. */
- /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
- }
- /* If we changed CCR1 in memory, change it in the processor, too. */
- if (ccrc[1])
- setCx86(CX86_CCR1, ccr[1]);
-
- /* Enable ARR usage by the processor */
- if (!(ccr[5] & 0x20)) {
- ccr[5] |= 0x20;
- ccrc[5] = 1;
- setCx86(CX86_CCR5, ccr[5]);
- }
-#ifdef CONFIG_SMP
- for (i = 0; i < 7; i++)
- ccr_state[i] = ccr[i];
- for (i = 0; i < 8; i++)
- cyrix_get_arr(i,
- &arr_state[i].base, &arr_state[i].size,
- &arr_state[i].type);
-#endif
-
- set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */
-
- if (ccrc[5])
- printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
- if (ccrc[3])
- printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
-/*
- if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
- if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
- if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
-*/
- if (ccrc[6])
- printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
-}
-#endif
-
static struct mtrr_ops cyrix_mtrr_ops = {
.vendor = X86_VENDOR_CYRIX,
// .init = cyrix_arr_init,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 71591958265..1e27b69a7a0 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,12 +59,6 @@ struct mtrr_ops * mtrr_if = NULL;
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
-#ifndef CONFIG_X86_64
-extern int arr3_protected;
-#else
-#define arr3_protected 0
-#endif
-
void set_mtrr_ops(struct mtrr_ops * ops)
{
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
@@ -513,12 +507,6 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
goto out;
}
- if (is_cpu(CYRIX) && !use_intel()) {
- if ((reg == 3) && arr3_protected) {
- printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
- goto out;
- }
- }
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
@@ -566,10 +554,6 @@ EXPORT_SYMBOL(mtrr_del);
* These should be called implicitly, but we can't yet until all the initcall
* stuff is done...
*/
-extern void amd_init_mtrr(void);
-extern void cyrix_init_mtrr(void);
-extern void centaur_init_mtrr(void);
-
static void __init init_ifs(void)
{
#ifndef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index fb74a2c2081..2cc77eb6fea 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -97,3 +97,7 @@ void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);
void mtrr_wrmsr(unsigned, unsigned, unsigned);
+/* CPU specific mtrr init functions */
+int amd_init_mtrr(void);
+int cyrix_init_mtrr(void);
+int centaur_init_mtrr(void);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 02821326014..af11d31dce0 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -10,80 +10,6 @@
*/
static int show_cpuinfo(struct seq_file *m, void *v)
{
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static const char * const x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, "sync_rdtsc",
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic",
- "cr8_legacy", "abm", "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", "sse5",
- "skinit", "wdt", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- static const char * const x86_power_flags[] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* constant_tsc - moved to flags */
- /* nothing */
- };
struct cpuinfo_x86 *c = v;
int i, n = 0;
int fpu_exception;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a63432d800f..288e7a6598a 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -17,6 +17,10 @@
* and then read in chunks of 16 bytes. A larger size means multiple
* reads of consecutive levels.
*
+ * The lower 32 bits of the file position is used as the incoming %eax,
+ * and the upper 32 bits of the file position as the incoming %ecx,
+ * the latter intended for "counting" eax levels like eax=4.
+ *
* This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
* an SMP box will direct the access to CPU %d.
*/
@@ -43,35 +47,24 @@
static struct class *cpuid_class;
-struct cpuid_command {
- u32 reg;
- u32 *data;
+struct cpuid_regs {
+ u32 eax, ebx, ecx, edx;
};
static void cpuid_smp_cpuid(void *cmd_block)
{
- struct cpuid_command *cmd = cmd_block;
-
- cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
- &cmd->data[3]);
-}
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
- struct cpuid_command cmd;
-
- cmd.reg = reg;
- cmd.data = data;
+ struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+ cpuid_count(cmd->eax, cmd->ecx,
+ &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
}
static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
{
loff_t ret;
+ struct inode *inode = file->f_mapping->host;
- lock_kernel();
-
+ mutex_lock(&inode->i_mutex);
switch (orig) {
case 0:
file->f_pos = offset;
@@ -84,8 +77,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
default:
ret = -EINVAL;
}
-
- unlock_kernel();
+ mutex_unlock(&inode->i_mutex);
return ret;
}
@@ -93,19 +85,21 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
size_t count, loff_t * ppos)
{
char __user *tmp = buf;
- u32 data[4];
- u32 reg = *ppos;
+ struct cpuid_regs cmd;
int cpu = iminor(file->f_path.dentry->d_inode);
+ u64 pos = *ppos;
if (count % 16)
return -EINVAL; /* Invalid chunk size */
for (; count; count -= 16) {
- do_cpuid(cpu, reg, data);
- if (copy_to_user(tmp, &data, 16))
+ cmd.eax = pos;
+ cmd.ecx = pos >> 32;
+ smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+ if (copy_to_user(tmp, &cmd, 16))
return -EFAULT;
tmp += 16;
- *ppos = reg++;
+ *ppos = ++pos;
}
return tmp - buf;
@@ -193,7 +187,7 @@ static int __init cpuid_init(void)
}
for_each_online_cpu(i) {
err = cpuid_device_create(i);
- if (err != 0)
+ if (err != 0)
goto out_class;
}
register_hotcpu_notifier(&cpuid_class_cpu_notifier);
@@ -208,7 +202,7 @@ out_class:
}
class_destroy(cpuid_class);
out_chrdev:
- unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
+ unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
out:
return err;
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1411324a625..32dd62b36ff 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -379,11 +379,9 @@ void __init efi_init(void)
#endif
}
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static void __init runtime_code_page_mkexec(void)
{
efi_memory_desc_t *md;
- unsigned long end;
void *p;
if (!(__supported_pte_mask & _PAGE_NX))
@@ -392,18 +390,13 @@ static void __init runtime_code_page_mkexec(void)
/* Make EFI runtime service code area executable */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
- end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
- if (md->type == EFI_RUNTIME_SERVICES_CODE &&
- (end >> PAGE_SHIFT) <= max_pfn_mapped) {
- set_memory_x(md->virt_addr, md->num_pages);
- set_memory_uc(md->virt_addr, md->num_pages);
- }
+
+ if (md->type != EFI_RUNTIME_SERVICES_CODE)
+ continue;
+
+ set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
}
- __flush_tlb_all();
}
-#else
-static inline void __init runtime_code_page_mkexec(void) { }
-#endif
/*
* This function will switch the EFI runtime services to virtual mode.
@@ -417,30 +410,40 @@ void __init efi_enter_virtual_mode(void)
{
efi_memory_desc_t *md;
efi_status_t status;
- unsigned long end;
- void *p;
+ unsigned long size;
+ u64 end, systab;
+ void *p, *va;
efi.systab = NULL;
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
- end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
- if ((md->attribute & EFI_MEMORY_WB) &&
- ((end >> PAGE_SHIFT) <= max_pfn_mapped))
- md->virt_addr = (unsigned long)__va(md->phys_addr);
+
+ size = md->num_pages << EFI_PAGE_SHIFT;
+ end = md->phys_addr + size;
+
+ if ((end >> PAGE_SHIFT) <= max_pfn_mapped)
+ va = __va(md->phys_addr);
else
- md->virt_addr = (unsigned long)
- efi_ioremap(md->phys_addr,
- md->num_pages << EFI_PAGE_SHIFT);
- if (!md->virt_addr)
+ va = efi_ioremap(md->phys_addr, size);
+
+ if (md->attribute & EFI_MEMORY_WB)
+ set_memory_uc(md->virt_addr, size);
+
+ md->virt_addr = (u64) (unsigned long) va;
+
+ if (!va) {
printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
(unsigned long long)md->phys_addr);
- if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
- ((unsigned long)efi_phys.systab < end))
- efi.systab = (efi_system_table_t *)(unsigned long)
- (md->virt_addr - md->phys_addr +
- (unsigned long)efi_phys.systab);
+ continue;
+ }
+
+ systab = (u64) (unsigned long) efi_phys.systab;
+ if (md->phys_addr <= systab && systab < end) {
+ systab += md->virt_addr - md->phys_addr;
+ efi.systab = (efi_system_table_t *) (unsigned long) systab;
+ }
}
BUG_ON(!efi.systab);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 674f2379480..09d5c233093 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -54,10 +54,10 @@ static void __init early_mapping_set_exec(unsigned long start,
else
set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
__supported_pte_mask));
- if (level == 4)
- start = (start + PMD_SIZE) & PMD_MASK;
- else
+ if (level == PG_LEVEL_4K)
start = (start + PAGE_SIZE) & PAGE_MASK;
+ else
+ start = (start + PMD_SIZE) & PMD_MASK;
}
}
@@ -109,23 +109,23 @@ void __init efi_reserve_bootmem(void)
memmap.nr_map * memmap.desc_size);
}
-void __iomem * __init efi_ioremap(unsigned long offset,
- unsigned long size)
+void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
{
static unsigned pages_mapped;
- unsigned long last_addr;
unsigned i, pages;
- last_addr = offset + size - 1;
- offset &= PAGE_MASK;
- pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT;
+ /* phys_addr and size must be page aligned */
+ if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK))
+ return NULL;
+
+ pages = size >> PAGE_SHIFT;
if (pages_mapped + pages > MAX_EFI_IO_PAGES)
return NULL;
for (i = 0; i < pages; i++) {
__set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
- offset, PAGE_KERNEL_EXEC_NOCACHE);
- offset += PAGE_SIZE;
+ phys_addr, PAGE_KERNEL);
+ phys_addr += PAGE_SIZE;
pages_mapped++;
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d5a7a36120..4f283ad215e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -63,7 +63,7 @@ startup_64:
/* Is the address not 2M aligned? */
movq %rbp, %rax
- andl $~LARGE_PAGE_MASK, %eax
+ andl $~PMD_PAGE_MASK, %eax
testl %eax, %eax
jnz bad_address
@@ -88,7 +88,7 @@ startup_64:
/* Add an Identity mapping if I am above 1G */
leaq _text(%rip), %rdi
- andq $LARGE_PAGE_MASK, %rdi
+ andq $PMD_PAGE_MASK, %rdi
movq %rdi, %rax
shrq $PUD_SHIFT, %rax
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 8a7660c8394..0224c3637c7 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -35,7 +35,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
if (mincount <= pc->size)
return 0;
oldsize = pc->size;
- mincount = (mincount + 511) & (~511);
+ mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
+ (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
else
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index bd82850e651..af51ea8400b 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
- *
- * Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -45,9 +45,10 @@ static struct class *msr_class;
static loff_t msr_seek(struct file *file, loff_t offset, int orig)
{
- loff_t ret = -EINVAL;
+ loff_t ret;
+ struct inode *inode = file->f_mapping->host;
- lock_kernel();
+ mutex_lock(&inode->i_mutex);
switch (orig) {
case 0:
file->f_pos = offset;
@@ -56,8 +57,11 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig)
case 1:
file->f_pos += offset;
ret = file->f_pos;
+ break;
+ default:
+ ret = -EINVAL;
}
- unlock_kernel();
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 4d5cc718198..845cbecd68e 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -501,7 +501,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
}
a = aper + iommu_size;
- iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
+ iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
if (iommu_size < 64*1024*1024) {
printk(KERN_WARNING
@@ -731,7 +731,8 @@ void __init gart_iommu_init(void)
* the backing memory. The GART address is only used by PCI
* devices.
*/
- clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
+ set_memory_np((unsigned long)__va(iommu_bus_base),
+ iommu_size >> PAGE_SHIFT);
/*
* Try to workaround a bug (thanks to BenH)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 968371ab223..dabdbeff1f7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -251,7 +251,7 @@ void cpu_idle_wait(void)
* because it has nothing to do.
* Give all the remaining CPUS a kick.
*/
- smp_call_function_mask(map, do_nothing, 0, 0);
+ smp_call_function_mask(map, do_nothing, NULL, 0);
} while (!cpus_empty(map));
set_cpus_allowed(current, tmp);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 18df70c534b..c8939dfddfb 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1068,82 +1068,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
struct cpuinfo_x86 *c = v;
int cpu = 0, i;
- /*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned. Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
- static const char *const x86_cap_flags[] = {
- /* Intel-defined */
- "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
- "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
- "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
- /* AMD-defined */
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
- NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
- "3dnowext", "3dnow",
-
- /* Transmeta-defined */
- "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Other (Linux-defined) */
- "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
- NULL, NULL, NULL, NULL,
- "constant_tsc", "up", NULL, "arch_perfmon",
- "pebs", "bts", NULL, "sync_rdtsc",
- "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
- "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
- NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* VIA/Cyrix/Centaur-defined */
- NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* AMD-defined (#2) */
- "lahf_lm", "cmp_legacy", "svm", "extapic",
- "cr8_legacy", "abm", "sse4a", "misalignsse",
- "3dnowprefetch", "osvw", "ibs", "sse5",
- "skinit", "wdt", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
- /* Auxiliary (Linux-defined) */
- "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- };
- static const char *const x86_power_flags[] = {
- "ts", /* temperature sensor */
- "fid", /* frequency id control */
- "vid", /* voltage id control */
- "ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
- "", /* tsc invariant mapped to constant_tsc */
- /* nothing */
- };
-
-
#ifdef CONFIG_SMP
cpu = c->cpu_index;
#endif
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index ae0ef2e304c..36c100c323a 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/sort.h>
#include <asm/uaccess.h>
+#include <asm/asm.h>
extern int rodata_test_data;
@@ -89,16 +90,7 @@ static noinline int test_address(void *address)
"2: mov %[zero], %[rslt]\n"
" ret\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
-#ifdef CONFIG_X86_32
- " .long 0b\n"
- " .long 2b\n"
-#else
- " .quad 0b\n"
- " .quad 2b\n"
-#endif
- ".previous\n"
+ _ASM_EXTABLE(0b,2b)
: [rslt] "=r" (result)
: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 9bcc1c6aca3..64580679861 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -11,12 +11,7 @@
* trampoline page to make our stack and everything else
* is a mystery.
*
- * In fact we don't actually need a stack so we don't
- * set one up.
- *
- * We jump into the boot/compressed/head.S code. So you'd
- * better be running a compressed kernel image or you
- * won't get very far.
+ * We jump into arch/x86/kernel/head_32.S.
*
* On entry to trampoline_data, the processor is in real mode
* with 16-bit addressing and 16-bit data. CS has some value
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index e30b67c6a9f..4aedd0bcee4 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -10,9 +10,6 @@
* trampoline page to make our stack and everything else
* is a mystery.
*
- * In fact we don't actually need a stack so we don't
- * set one up.
- *
* On entry to trampoline_data, the processor is in real mode
* with 16-bit addressing and 16-bit data. CS has some value
* and IP is zero. Thus, data addresses need to be absolute
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 4525bc2c2e1..12affe1f9bc 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -220,21 +220,21 @@ static void vmi_set_tr(void)
static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
u32 *idt_entry = (u32 *)g;
- vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]);
+ vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
}
static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
const void *desc, int type)
{
u32 *gdt_entry = (u32 *)desc;
- vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]);
+ vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
}
static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
{
u32 *ldt_entry = (u32 *)desc;
- vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]);
+ vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
}
static void vmi_load_sp0(struct tss_struct *tss,
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c83e1c9b512..41962e793c0 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -53,5 +53,6 @@ config KVM_AMD
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/lguest/Kconfig
+source drivers/virtio/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c
index afd0045595d..b6544045985 100644
--- a/arch/x86/lib/bitops_32.c
+++ b/arch/x86/lib/bitops_32.c
@@ -2,7 +2,7 @@
#include <linux/module.h>
/**
- * find_next_bit - find the first set bit in a memory region
+ * find_next_bit - find the next set bit in a memory region
* @addr: The address to base the search on
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
index 95b6d9639fb..0e8f491e6cc 100644
--- a/arch/x86/lib/bitops_64.c
+++ b/arch/x86/lib/bitops_64.c
@@ -58,7 +58,7 @@ long find_first_zero_bit(const unsigned long * addr, unsigned long size)
}
/**
- * find_next_zero_bit - find the first zero bit in a memory region
+ * find_next_zero_bit - find the next zero bit in a memory region
* @addr: The address to base the search on
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 28084d2e8dd..cc9b4a4450f 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -4,6 +4,7 @@
#include <linux/hardirq.h>
#include <linux/module.h>
+#include <asm/asm.h>
#include <asm/i387.h>
@@ -50,10 +51,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
@@ -81,10 +79,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
@@ -181,10 +176,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
for(i=0; i<(4096-320)/64; i++)
@@ -211,10 +203,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
@@ -311,10 +300,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from) );
for(i=0; i<4096/64; i++)
@@ -341,10 +327,7 @@ static void fast_copy_page(void *to, void *from)
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b, 3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9c4ffd5bedb..e849b9998b0 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -48,10 +48,7 @@ do { \
"3: movl %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
: "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -132,11 +129,8 @@ do { \
"3: lea 0(%2,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 0b,3b\n" \
- " .long 1b,2b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
+ _ASM_EXTABLE(1b,2b) \
: "=&c"(size), "=&D" (__d0) \
: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
} while (0)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 893d43f838c..0c89d1bb028 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -31,10 +31,7 @@ do { \
"3: movq %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad 0b,3b\n" \
- ".previous" \
+ _ASM_EXTABLE(0b,3b) \
: "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -87,11 +84,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
"3: lea 0(%[size1],%[size8],8),%[size8]\n"
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 0b,3b\n"
- " .quad 1b,2b\n"
- ".previous"
+ _ASM_EXTABLE(0b,3b)
+ _ASM_EXTABLE(1b,2b)
: [size8] "=c"(size), [dst] "=&D" (__d0)
: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
[zero] "r" (0UL), [eight] "r" (8UL));
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4440d0abf8..ad8b9733d6b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -240,7 +240,8 @@ void dump_pagetable(unsigned long address)
pud = pud_offset(pgd, address);
if (bad_address(pud)) goto bad;
printk("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud)) goto ret;
+ if (!pud_present(*pud) || pud_large(*pud))
+ goto ret;
pmd = pmd_offset(pud, address);
if (bad_address(pmd)) goto bad;
@@ -508,6 +509,10 @@ static int vmalloc_fault(unsigned long address)
pmd_t *pmd, *pmd_ref;
pte_t *pte, *pte_ref;
+ /* Make sure we are in vmalloc area */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
/* Copy kernel mappings over when needed. This can also
happen within a race in page table update. In the later
case just flush. */
@@ -603,6 +608,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/
#ifdef CONFIG_X86_32
if (unlikely(address >= TASK_SIZE)) {
+#else
+ if (unlikely(address >= TASK_SIZE64)) {
+#endif
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
vmalloc_fault(address) >= 0)
return;
@@ -618,6 +626,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
goto bad_area_nosemaphore;
}
+
+#ifdef CONFIG_X86_32
/* It's safe to allow irq's after cr2 has been saved and the vmalloc
fault has been handled. */
if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
@@ -630,28 +640,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (in_atomic() || !mm)
goto bad_area_nosemaphore;
#else /* CONFIG_X86_64 */
- if (unlikely(address >= TASK_SIZE64)) {
- /*
- * Don't check for the module range here: its PML4
- * is always initialized because it's shared with the main
- * kernel text. Only vmalloc may need PML4 syncups.
- */
- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
- ((address >= VMALLOC_START && address < VMALLOC_END))) {
- if (vmalloc_fault(address) >= 0)
- return;
- }
-
- /* Can handle a stale RO->RW TLB */
- if (spurious_fault(address, error_code))
- return;
-
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
- */
- goto bad_area_nosemaphore;
- }
if (likely(regs->flags & X86_EFLAGS_IF))
local_irq_enable();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f2f36f8dae5..d1bc04006d1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -31,6 +31,7 @@
#include <linux/initrd.h>
#include <linux/cpumask.h>
+#include <asm/asm.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -718,10 +719,7 @@ static noinline int do_test_wp_bit(void)
"1: movb %1, %0 \n"
" xorl %2, %2 \n"
"2: \n"
- ".section __ex_table, \"a\"\n"
- " .align 4 \n"
- " .long 1b, 2b \n"
- ".previous \n"
+ _ASM_EXTABLE(1b,2b)
:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
"=q" (tmp_reg),
"=r" (flag)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index eabcaed76c2..3a98d6f724a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -273,7 +273,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
int i = pmd_index(address);
for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
- unsigned long entry;
pmd_t *pmd = pmd_page + pmd_index(address);
if (address >= end) {
@@ -287,9 +286,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
if (pmd_val(*pmd))
continue;
- entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
- entry &= __supported_pte_mask;
- set_pmd(pmd, __pmd(entry));
+ set_pte((pte_t *)pmd,
+ pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
}
}
@@ -435,49 +433,6 @@ void __init paging_init(void)
#endif
/*
- * Unmap a kernel mapping if it exists. This is useful to avoid
- * prefetches from the CPU leading to inconsistent cache lines.
- * address and size must be aligned to 2MB boundaries.
- * Does nothing when the mapping doesn't exist.
- */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
-{
- unsigned long end = address + size;
-
- BUG_ON(address & ~LARGE_PAGE_MASK);
- BUG_ON(size & ~LARGE_PAGE_MASK);
-
- for (; address < end; address += LARGE_PAGE_SIZE) {
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
-
- if (pgd_none(*pgd))
- continue;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- continue;
-
- pmd = pmd_offset(pud, address);
- if (!pmd || pmd_none(*pmd))
- continue;
-
- if (!(pmd_val(*pmd) & _PAGE_PSE)) {
- /*
- * Could handle this, but it should not happen
- * currently:
- */
- printk(KERN_ERR "clear_kernel_mapping: "
- "mapping has been split. will leak memory\n");
- pmd_ERROR(*pmd);
- }
- set_pmd(pmd, __pmd(0));
- }
- __flush_tlb_all();
-}
-
-/*
* Memory hotplug specific functions
*/
void online_page(struct page *page)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c004d94608f..ee6648fe6b1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -70,25 +70,12 @@ int page_is_ram(unsigned long pagenr)
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
*/
-static int ioremap_change_attr(unsigned long paddr, unsigned long size,
+static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
enum ioremap_mode mode)
{
- unsigned long vaddr = (unsigned long)__va(paddr);
unsigned long nrpages = size >> PAGE_SHIFT;
- unsigned int level;
int err;
- /* No change for pages after the last mapping */
- if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT))
- return 0;
-
- /*
- * If there is no identity map for this address,
- * change_page_attr_addr is unnecessary
- */
- if (!lookup_address(vaddr, &level))
- return 0;
-
switch (mode) {
case IOR_MODE_UNCACHED:
default:
@@ -114,9 +101,8 @@ static int ioremap_change_attr(unsigned long paddr, unsigned long size,
static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
enum ioremap_mode mode)
{
- void __iomem *addr;
+ unsigned long pfn, offset, last_addr, vaddr;
struct vm_struct *area;
- unsigned long offset, last_addr;
pgprot_t prot;
/* Don't allow wraparound or zero size */
@@ -133,9 +119,10 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
- for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped &&
- (offset << PAGE_SHIFT) < last_addr; offset++) {
- if (page_is_ram(offset))
+ for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped &&
+ (pfn << PAGE_SHIFT) < last_addr; pfn++) {
+ if (page_is_ram(pfn) && pfn_valid(pfn) &&
+ !PageReserved(pfn_to_page(pfn)))
return NULL;
}
@@ -163,19 +150,18 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
if (!area)
return NULL;
area->phys_addr = phys_addr;
- addr = (void __iomem *) area->addr;
- if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
- phys_addr, prot)) {
- remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+ vaddr = (unsigned long) area->addr;
+ if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
+ remove_vm_area((void *)(vaddr & PAGE_MASK));
return NULL;
}
- if (ioremap_change_attr(phys_addr, size, mode) < 0) {
- vunmap(addr);
+ if (ioremap_change_attr(vaddr, size, mode) < 0) {
+ vunmap(area->addr);
return NULL;
}
- return (void __iomem *) (offset + (char __iomem *)addr);
+ return (void __iomem *) (vaddr + offset);
}
/**
@@ -254,9 +240,6 @@ void iounmap(volatile void __iomem *addr)
return;
}
- /* Reset the direct mapping. Can block */
- ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED);
-
/* Finally remove it */
o = remove_vm_area((void *)addr);
BUG_ON(p != o || o == NULL);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a920d09b919..5a02bf4c91e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -202,6 +202,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
if (node_data[nodeid] == NULL)
return;
nodedata_phys = __pa(node_data[nodeid]);
+ printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
+ nodedata_phys + pgdat_size - 1);
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
@@ -225,12 +227,15 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
return;
}
bootmap_start = __pa(bootmap);
- Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
bootmap_start >> PAGE_SHIFT,
start_pfn, end_pfn);
+ printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
+ bootmap_start, bootmap_start + bootmap_size - 1,
+ bootmap_pages);
+
free_bootmem_with_active_regions(nodeid, end);
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 7573e786d2f..398f3a578dd 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -137,7 +137,8 @@ static __init int exercise_pageattr(void)
for (k = 0; k < len[i]; k++) {
pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
- if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) {
+ if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 ||
+ !(pte_val(*pte) & _PAGE_PRESENT)) {
addr[i] = 0;
break;
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e297bd65e51..bb55a78dcd6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,17 @@
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+ unsigned long vaddr;
+ pgprot_t mask_set;
+ pgprot_t mask_clr;
+ int numpages;
+ int flushtlb;
+};
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -52,21 +63,23 @@ void clflush_cache_range(void *vaddr, unsigned int size)
static void __cpa_flush_all(void *arg)
{
+ unsigned long cache = (unsigned long)arg;
+
/*
* Flush all to work around Errata in early athlons regarding
* large page flushing.
*/
__flush_tlb_all();
- if (boot_cpu_data.x86_model >= 4)
+ if (cache && boot_cpu_data.x86_model >= 4)
wbinvd();
}
-static void cpa_flush_all(void)
+static void cpa_flush_all(unsigned long cache)
{
BUG_ON(irqs_disabled());
- on_each_cpu(__cpa_flush_all, NULL, 1, 1);
+ on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
}
static void __cpa_flush_range(void *arg)
@@ -79,7 +92,7 @@ static void __cpa_flush_range(void *arg)
__flush_tlb_all();
}
-static void cpa_flush_range(unsigned long start, int numpages)
+static void cpa_flush_range(unsigned long start, int numpages, int cache)
{
unsigned int i, level;
unsigned long addr;
@@ -89,6 +102,9 @@ static void cpa_flush_range(unsigned long start, int numpages)
on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+ if (!cache)
+ return;
+
/*
* We only need to flush on one CPU,
* clflush is a MESI-coherent instruction that
@@ -101,11 +117,27 @@ static void cpa_flush_range(unsigned long start, int numpages)
/*
* Only flush present addresses:
*/
- if (pte && pte_present(*pte))
+ if (pte && (pte_val(*pte) & _PAGE_PRESENT))
clflush_cache_range((void *) addr, PAGE_SIZE);
}
}
+#define HIGH_MAP_START __START_KERNEL_map
+#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
+
+
+/*
+ * Converts a virtual address to a X86-64 highmap address
+ */
+static unsigned long virt_to_highmap(void *address)
+{
+#ifdef CONFIG_X86_64
+ return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
+#else
+ return (unsigned long)address;
+#endif
+}
+
/*
* Certain areas of memory on x86 require very specific protection flags,
* for example the BIOS area or kernel text. Callers don't always get this
@@ -129,12 +161,24 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
*/
if (within(address, (unsigned long)_text, (unsigned long)_etext))
pgprot_val(forbidden) |= _PAGE_NX;
+ /*
+ * Do the same for the x86-64 high kernel mapping
+ */
+ if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
+ pgprot_val(forbidden) |= _PAGE_NX;
+
#ifdef CONFIG_DEBUG_RODATA
/* The .rodata section needs to be read-only */
if (within(address, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
pgprot_val(forbidden) |= _PAGE_RW;
+ /*
+ * Do the same for the x86-64 high kernel mapping
+ */
+ if (within(address, virt_to_highmap(__start_rodata),
+ virt_to_highmap(__end_rodata)))
+ pgprot_val(forbidden) |= _PAGE_RW;
#endif
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -142,6 +186,14 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
return prot;
}
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
pte_t *lookup_address(unsigned long address, int *level)
{
pgd_t *pgd = pgd_offset_k(address);
@@ -152,21 +204,31 @@ pte_t *lookup_address(unsigned long address, int *level)
if (pgd_none(*pgd))
return NULL;
+
pud = pud_offset(pgd, address);
if (pud_none(*pud))
return NULL;
+
+ *level = PG_LEVEL_1G;
+ if (pud_large(*pud) || !pud_present(*pud))
+ return (pte_t *)pud;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;
*level = PG_LEVEL_2M;
- if (pmd_large(*pmd))
+ if (pmd_large(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
*level = PG_LEVEL_4K;
+
return pte_offset_kernel(pmd, address);
}
+/*
+ * Set the new pmd in all the pgds we know about:
+ */
static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{
/* change init_mm */
@@ -175,6 +237,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
if (!SHARED_KERNEL_PMD) {
struct page *page;
+ address = __pa(address);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
pud_t *pud;
@@ -189,18 +252,114 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
#endif
}
+static int
+try_preserve_large_page(pte_t *kpte, unsigned long address,
+ struct cpa_data *cpa)
+{
+ unsigned long nextpage_addr, numpages, pmask, psize, flags;
+ pte_t new_pte, old_pte, *tmp;
+ pgprot_t old_prot, new_prot;
+ int level, do_split = 1;
+
+ /*
+ * An Athlon 64 X2 showed hard hangs if we tried to preserve
+ * largepages and changed the PSE entry from RW to RO.
+ *
+ * As AMD CPUs have a long series of erratas in this area,
+ * (and none of the known ones seem to explain this hang),
+ * disable this code until the hang can be debugged:
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return 1;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ /*
+ * Check for races, another CPU might have split this page
+ * up already:
+ */
+ tmp = lookup_address(address, &level);
+ if (tmp != kpte)
+ goto out_unlock;
+
+ switch (level) {
+ case PG_LEVEL_2M:
+ psize = PMD_PAGE_SIZE;
+ pmask = PMD_PAGE_MASK;
+ break;
+#ifdef CONFIG_X86_64
+ case PG_LEVEL_1G:
+ psize = PMD_PAGE_SIZE;
+ pmask = PMD_PAGE_MASK;
+ break;
+#endif
+ default:
+ do_split = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Calculate the number of pages, which fit into this large
+ * page starting at address:
+ */
+ nextpage_addr = (address + psize) & pmask;
+ numpages = (nextpage_addr - address) >> PAGE_SHIFT;
+ if (numpages < cpa->numpages)
+ cpa->numpages = numpages;
+
+ /*
+ * We are safe now. Check whether the new pgprot is the same:
+ */
+ old_pte = *kpte;
+ old_prot = new_prot = pte_pgprot(old_pte);
+
+ pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+ pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+ new_prot = static_protections(new_prot, address);
+
+ /*
+ * If there are no changes, return. maxpages has been updated
+ * above:
+ */
+ if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+ do_split = 0;
+ goto out_unlock;
+ }
+
+ /*
+ * We need to change the attributes. Check, whether we can
+ * change the large page in one go. We request a split, when
+ * the address is not aligned and the number of pages is
+ * smaller than the number of pages in the large page. Note
+ * that we limited the number of possible pages already to
+ * the number of pages in the large page.
+ */
+ if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+ /*
+ * The address is aligned and the number of pages
+ * covers the full page.
+ */
+ new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+ __set_pmd_pte(kpte, address, new_pte);
+ cpa->flushtlb = 1;
+ do_split = 0;
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ return do_split;
+}
+
static int split_large_page(pte_t *kpte, unsigned long address)
{
- pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ unsigned long flags, pfn, pfninc = 1;
gfp_t gfp_flags = GFP_KERNEL;
- unsigned long flags;
- unsigned long addr;
+ unsigned int i, level;
pte_t *pbase, *tmp;
+ pgprot_t ref_prot;
struct page *base;
- unsigned int i, level;
#ifdef CONFIG_DEBUG_PAGEALLOC
- gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
#endif
base = alloc_pages(gfp_flags, 0);
@@ -213,30 +372,41 @@ static int split_large_page(pte_t *kpte, unsigned long address)
* up for us already:
*/
tmp = lookup_address(address, &level);
- if (tmp != kpte) {
- WARN_ON_ONCE(1);
+ if (tmp != kpte)
goto out_unlock;
- }
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
#ifdef CONFIG_X86_32
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
#endif
+ ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+
+#ifdef CONFIG_X86_64
+ if (level == PG_LEVEL_1G) {
+ pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+ pgprot_val(ref_prot) |= _PAGE_PSE;
+ }
+#endif
- pgprot_val(ref_prot) &= ~_PAGE_NX;
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
- set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+ /*
+ * Get the target pfn from the original entry:
+ */
+ pfn = pte_pfn(*kpte);
+ for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
+ set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
/*
- * Install the new, split up pagetable. Important detail here:
+ * Install the new, split up pagetable. Important details here:
*
* On Intel the NX bit of all levels must be cleared to make a
* page executable. See section 4.13.2 of Intel 64 and IA-32
* Architectures Software Developer's Manual).
+ *
+ * Mark the entry present. The current mapping might be
+ * set to not present, which we preserved above.
*/
ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
+ pgprot_val(ref_prot) |= _PAGE_PRESENT;
__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
base = NULL;
@@ -249,18 +419,12 @@ out_unlock:
return 0;
}
-static int
-__change_page_attr(unsigned long address, unsigned long pfn,
- pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
{
+ int level, do_split, err;
struct page *kpte_page;
- int level, err = 0;
pte_t *kpte;
-#ifdef CONFIG_X86_32
- BUG_ON(pfn > max_low_pfn);
-#endif
-
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
@@ -271,23 +435,62 @@ repeat:
BUG_ON(PageCompound(kpte_page));
if (level == PG_LEVEL_4K) {
- pgprot_t new_prot = pte_pgprot(*kpte);
pte_t new_pte, old_pte = *kpte;
+ pgprot_t new_prot = pte_pgprot(old_pte);
+
+ if(!pte_val(old_pte)) {
+ printk(KERN_WARNING "CPA: called for zero pte. "
+ "vaddr = %lx cpa->vaddr = %lx\n", address,
+ cpa->vaddr);
+ WARN_ON(1);
+ return -EINVAL;
+ }
- pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
- pgprot_val(new_prot) |= pgprot_val(mask_set);
+ pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+ pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
new_prot = static_protections(new_prot, address);
- new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
- BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte));
+ /*
+ * We need to keep the pfn from the existing PTE,
+ * after all we're only going to change it's attributes
+ * not the memory it points to
+ */
+ new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+
+ /*
+ * Do we really change anything ?
+ */
+ if (pte_val(old_pte) != pte_val(new_pte)) {
+ set_pte_atomic(kpte, new_pte);
+ cpa->flushtlb = 1;
+ }
+ cpa->numpages = 1;
+ return 0;
+ }
+
+ /*
+ * Check, whether we can keep the large page intact
+ * and just change the pte:
+ */
+ do_split = try_preserve_large_page(kpte, address, cpa);
+ /*
+ * When the range fits into the existing large page,
+ * return. cp->numpages and cpa->tlbflush have been updated in
+ * try_large_page:
+ */
+ if (do_split <= 0)
+ return do_split;
- set_pte_atomic(kpte, new_pte);
- } else {
- err = split_large_page(kpte, address);
- if (!err)
- goto repeat;
+ /*
+ * We have to split the large page:
+ */
+ err = split_large_page(kpte, address);
+ if (!err) {
+ cpa->flushtlb = 1;
+ goto repeat;
}
+
return err;
}
@@ -304,19 +507,14 @@ repeat:
*
* Modules and drivers should use the set_memory_* APIs instead.
*/
-
-#define HIGH_MAP_START __START_KERNEL_map
-#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE)
-
-static int
-change_page_attr_addr(unsigned long address, pgprot_t mask_set,
- pgprot_t mask_clr)
+static int change_page_attr_addr(struct cpa_data *cpa)
{
- unsigned long phys_addr = __pa(address);
- unsigned long pfn = phys_addr >> PAGE_SHIFT;
int err;
+ unsigned long address = cpa->vaddr;
#ifdef CONFIG_X86_64
+ unsigned long phys_addr = __pa(address);
+
/*
* If we are inside the high mapped kernel range, then we
* fixup the low mapping first. __va() returns the virtual
@@ -326,7 +524,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
address = (unsigned long) __va(phys_addr);
#endif
- err = __change_page_attr(address, pfn, mask_set, mask_clr);
+ err = __change_page_attr(address, cpa);
if (err)
return err;
@@ -339,42 +537,89 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
/*
* Calc the high mapping address. See __phys_addr()
* for the non obvious details.
+ *
+ * Note that NX and other required permissions are
+ * checked in static_protections().
*/
address = phys_addr + HIGH_MAP_START - phys_base;
- /* Make sure the kernel mappings stay executable */
- pgprot_val(mask_clr) |= _PAGE_NX;
/*
* Our high aliases are imprecise, because we check
* everything between 0 and KERNEL_TEXT_SIZE, so do
* not propagate lookup failures back to users:
*/
- __change_page_attr(address, pfn, mask_set, mask_clr);
+ __change_page_attr(address, cpa);
}
#endif
return err;
}
-static int __change_page_attr_set_clr(unsigned long addr, int numpages,
- pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr_set_clr(struct cpa_data *cpa)
{
- unsigned int i;
- int ret;
+ int ret, numpages = cpa->numpages;
- for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) {
- ret = change_page_attr_addr(addr, mask_set, mask_clr);
+ while (numpages) {
+ /*
+ * Store the remaining nr of pages for the large page
+ * preservation check.
+ */
+ cpa->numpages = numpages;
+ ret = change_page_attr_addr(cpa);
if (ret)
return ret;
- }
+ /*
+ * Adjust the number of pages with the result of the
+ * CPA operation. Either a large page has been
+ * preserved or a single page update happened.
+ */
+ BUG_ON(cpa->numpages > numpages);
+ numpages -= cpa->numpages;
+ cpa->vaddr += cpa->numpages * PAGE_SIZE;
+ }
return 0;
}
+static inline int cache_attr(pgprot_t attr)
+{
+ return pgprot_val(attr) &
+ (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
+}
+
static int change_page_attr_set_clr(unsigned long addr, int numpages,
pgprot_t mask_set, pgprot_t mask_clr)
{
- int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
- mask_clr);
+ struct cpa_data cpa;
+ int ret, cache;
+
+ /*
+ * Check, if we are requested to change a not supported
+ * feature:
+ */
+ mask_set = canon_pgprot(mask_set);
+ mask_clr = canon_pgprot(mask_clr);
+ if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
+ return 0;
+
+ cpa.vaddr = addr;
+ cpa.numpages = numpages;
+ cpa.mask_set = mask_set;
+ cpa.mask_clr = mask_clr;
+ cpa.flushtlb = 0;
+
+ ret = __change_page_attr_set_clr(&cpa);
+
+ /*
+ * Check whether we really changed something:
+ */
+ if (!cpa.flushtlb)
+ return ret;
+
+ /*
+ * No need to flush, when we did not set any of the caching
+ * attributes:
+ */
+ cache = cache_attr(mask_set);
/*
* On success we use clflush, when the CPU supports it to
@@ -383,9 +628,9 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
* wbindv):
*/
if (!ret && cpu_has_clflush)
- cpa_flush_range(addr, numpages);
+ cpa_flush_range(addr, numpages, cache);
else
- cpa_flush_all();
+ cpa_flush_all(cache);
return ret;
}
@@ -489,37 +734,26 @@ int set_pages_rw(struct page *page, int numpages)
return set_memory_rw(addr, numpages);
}
-
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
-static inline int __change_page_attr_set(unsigned long addr, int numpages,
- pgprot_t mask)
-{
- return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
-}
-
-static inline int __change_page_attr_clear(unsigned long addr, int numpages,
- pgprot_t mask)
-{
- return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
-}
-#endif
-
#ifdef CONFIG_DEBUG_PAGEALLOC
static int __set_pages_p(struct page *page, int numpages)
{
- unsigned long addr = (unsigned long)page_address(page);
+ struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ .numpages = numpages,
+ .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ .mask_clr = __pgprot(0)};
- return __change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_PRESENT | _PAGE_RW));
+ return __change_page_attr_set_clr(&cpa);
}
static int __set_pages_np(struct page *page, int numpages)
{
- unsigned long addr = (unsigned long)page_address(page);
+ struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+ .numpages = numpages,
+ .mask_set = __pgprot(0),
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
- return __change_page_attr_clear(addr, numpages,
- __pgprot(_PAGE_PRESENT));
+ return __change_page_attr_set_clr(&cpa);
}
void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cb3aa470249..c7db504be1e 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -219,50 +219,39 @@ static inline void pgd_list_del(pgd_t *pgd)
list_del(&page->lru);
}
+#define UNSHARED_PTRS_PER_PGD \
+ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
-#if (PTRS_PER_PMD == 1)
-/* Non-PAE pgd constructor */
-static void pgd_ctor(void *pgd)
+static void pgd_ctor(void *p)
{
+ pgd_t *pgd = p;
unsigned long flags;
- /* !PAE, no pagetable sharing */
+ /* Clear usermode parts of PGD */
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
spin_lock_irqsave(&pgd_lock, flags);
- /* must happen under lock */
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
- swapper_pg_dir + USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
- USER_PTRS_PER_PGD,
- KERNEL_PGD_PTRS);
- pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-}
-#else /* PTRS_PER_PMD > 1 */
-/* PAE pgd constructor */
-static void pgd_ctor(void *pgd)
-{
- /* PAE, kernel PMD may be shared */
-
- if (SHARED_KERNEL_PMD) {
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ /* If the pgd points to a shared pagetable level (either the
+ ptes in non-PAE, or shared PMD in PAE), then just copy the
+ references from swapper_pg_dir. */
+ if (PAGETABLE_LEVELS == 2 ||
+ (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+ clone_pgd_range(pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
KERNEL_PGD_PTRS);
- } else {
- unsigned long flags;
+ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
+ USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
+ }
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- spin_lock_irqsave(&pgd_lock, flags);
+ /* list required to sync kernel mapping updates */
+ if (!SHARED_KERNEL_PMD)
pgd_list_add(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
}
-#endif /* PTRS_PER_PMD */
static void pgd_dtor(void *pgd)
{
@@ -276,9 +265,6 @@ static void pgd_dtor(void *pgd)
spin_unlock_irqrestore(&pgd_lock, flags);
}
-#define UNSHARED_PTRS_PER_PGD \
- (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
#ifdef CONFIG_X86_PAE
/*
* Mop up any pmd pages which may still be attached to the pgd.
@@ -387,13 +373,6 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
- /* This is called just after the pmd has been detached from
- the pgd, which requires a full tlb flush to be recognized
- by the CPU. Rather than incurring multiple tlb flushes
- while the address space is being pulled down, make the tlb
- gathering machinery do a full flush when we're done. */
- tlb->fullmm = 1;
-
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pmd));
}
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index f5f165f69e0..55270c26237 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -5,36 +5,62 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/nodemask.h>
+#include <mach_apic.h>
#include "pci.h"
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
+
#define BUS2QUAD(global) (mp_bus_id_to_node[global])
#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+extern void *xquad_portio; /* Where the IO area was mapped */
+#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
+
#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
+static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
+{
+ unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
+ if (xquad_portio)
+ writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
+ else
+ outl(val, 0xCF8);
+}
+
static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
+ void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
return -EINVAL;
spin_lock_irqsave(&pci_config_lock, flags);
- outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+ write_cf8(bus, devfn, reg);
switch (len) {
case 1:
- *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readb(adr + (reg & 3));
+ else
+ *value = inb(0xCFC + (reg & 3));
break;
case 2:
- *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readw(adr + (reg & 2));
+ else
+ *value = inw(0xCFC + (reg & 2));
break;
case 4:
- *value = inl_quad(0xCFC, BUS2QUAD(bus));
+ if (xquad_portio)
+ *value = readl(adr);
+ else
+ *value = inl(0xCFC);
break;
}
@@ -47,23 +73,33 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
+ void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
return -EINVAL;
spin_lock_irqsave(&pci_config_lock, flags);
- outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+ write_cf8(bus, devfn, reg);
switch (len) {
case 1:
- outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+ if (xquad_portio)
+ writeb(value, adr + (reg & 3));
+ else
+ outb((u8)value, 0xCFC + (reg & 3));
break;
case 2:
- outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+ if (xquad_portio)
+ writew(value, adr + (reg & 2));
+ else
+ outw((u16)value, 0xCFC + (reg & 2));
break;
case 4:
- outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+ if (xquad_portio)
+ writel(value, adr + reg);
+ else
+ outl((u32)value, 0xCFC);
break;
}