diff options
Diffstat (limited to 'arch/x86')
170 files changed, 6135 insertions, 3508 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b2ddfcf0172..21ef9dd3618 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,11 +23,14 @@ config X86 select HAVE_OPROFILE select HAVE_IOREMAP_PROT select HAVE_KPROBES + select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_KRETPROBES select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER + select HAVE_GENERIC_DMA_COHERENT if X86_32 + select HAVE_EFFICIENT_UNALIGNED_ACCESS config ARCH_DEFCONFIG string @@ -330,20 +333,6 @@ config X86_BIGSMP endif -config X86_RDC321X - bool "RDC R-321x SoC" - depends on X86_32 - select M486 - select X86_REBOOTFIXUPS - select GENERIC_GPIO - select LEDS_CLASS - select LEDS_GPIO - select NEW_LEDS - help - This option is needed for RDC R-321x system-on-chip, also known - as R-8610-(G). - If you don't have one of these chips, you should say N here. - config X86_VSMP bool "Support for ScaleMP vSMP" select PARAVIRT @@ -367,6 +356,16 @@ config X86_VISWS A kernel compiled for the Visual Workstation will run on general PCs as well. See <file:Documentation/sgi-visws.txt> for details. +config X86_RDC321X + bool "RDC R-321x SoC" + depends on X86_32 + select M486 + select X86_REBOOTFIXUPS + help + This option is needed for RDC R-321x system-on-chip, also known + as R-8610-(G). + If you don't have one of these chips, you should say N here. + config SCHED_NO_NO_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" @@ -578,35 +577,29 @@ config SWIOTLB config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) + config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP + depends on X86_64 && SMP && BROKEN default n help Configure maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. -if MAXSMP config NR_CPUS - int - default "4096" -endif - -if !MAXSMP -config NR_CPUS - int "Maximum number of CPUs (2-4096)" - range 2 4096 + int "Maximum number of CPUs (2-512)" if !MAXSMP + range 2 512 depends on SMP + default "4096" if MAXSMP default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 default "8" help This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 4096 and the + kernel will support. The maximum supported value is 512 and the minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. -endif config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" @@ -952,9 +945,9 @@ config NUMA local memory controller of the CPU and add some more NUMA awareness to the kernel. - For i386 this is currently highly experimental and should be only + For 32-bit this is currently highly experimental and should be only used for kernel development. It might also cause boot failures. - For x86_64 this is recommended on all multiprocessor Opteron systems. + For 64-bit this is recommended on all multiprocessor Opteron systems. If the system is EM64T, you should say N unless your system is EM64T NUMA. @@ -997,17 +990,10 @@ config NUMA_EMU into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. -if MAXSMP - config NODES_SHIFT - int - default "9" -endif - -if !MAXSMP -config NODES_SHIFT - int "Maximum NUMA Nodes (as a power of 2)" + int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP range 1 9 if X86_64 + default "9" if MAXSMP default "6" if X86_64 default "4" if X86_NUMAQ default "3" @@ -1015,7 +1001,6 @@ config NODES_SHIFT help Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accomodate various tables. -endif config HAVE_ARCH_BOOTMEM_NODE def_bool y @@ -1264,7 +1249,7 @@ config KEXEC strongly in flux, so no good recommendation can be made. config CRASH_DUMP - bool "kernel crash dumps (EXPERIMENTAL)" + bool "kernel crash dumps" depends on X86_64 || (X86_32 && HIGHMEM) help Generate crash dump after being started by kexec. @@ -1277,6 +1262,14 @@ config CRASH_DUMP (CONFIG_RELOCATABLE=y). For more details see Documentation/kdump/kdump.txt +config KEXEC_JUMP + bool "kexec jump (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on KEXEC && HIBERNATION && X86_32 + help + Jump between original kernel and kexeced kernel and invoke + code in physical address mode via KEXEC + config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) default "0x1000000" if X86_NUMAQ @@ -1650,6 +1643,14 @@ config DMAR_FLOPPY_WA workaround will setup a 1:1 mapping for the first 16M to make floppy (an ISA device) work. +config INTR_REMAP + bool "Support for Interrupt Remapping (EXPERIMENTAL)" + depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL + help + Supports Interrupt remapping for IO-APIC and MSI devices. + To use x2apic mode in the CPU's which support x2APIC enhancements or + to support platforms with CPU's having > 8 bit APIC ID, say Y. + source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 468ffc2df0e..1b29d6a8756 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -416,6 +416,68 @@ config X86_DEBUGCTLMSR def_bool y depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) +menuconfig PROCESSOR_SELECT + default y + bool "Supported processor vendors" if EMBEDDED + help + This lets you choose what x86 vendor support code your kernel + will include. + +config CPU_SUP_INTEL_32 + default y + bool "Support Intel processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Intel processors + +config CPU_SUP_INTEL_64 + default y + bool "Support Intel processors" if PROCESSOR_SELECT + depends on 64BIT + help + This enables extended support for Intel processors + +config CPU_SUP_CYRIX_32 + default y + bool "Support Cyrix processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Cyrix processors + +config CPU_SUP_AMD + default y + bool "Support AMD processors" if PROCESSOR_SELECT + help + This enables extended support for AMD processors + +config CPU_SUP_CENTAUR_32 + default y + bool "Support Centaur processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Centaur processors + +config CPU_SUP_CENTAUR_64 + default y + bool "Support Centaur processors" if PROCESSOR_SELECT + depends on 64BIT + help + This enables extended support for Centaur processors + +config CPU_SUP_TRANSMETA_32 + default y + bool "Support Transmeta processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for Transmeta processors + +config CPU_SUP_UMC_32 + default y + bool "Support UMC processors" if PROCESSOR_SELECT + depends on !64BIT + help + This enables extended support for UMC processors + config X86_DS bool "Debug Store support" default y diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 919ce21ea65..f5631da585b 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -118,11 +118,6 @@ mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ -# RDC R-321x subarch support -mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x -mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ -core-$(CONFIG_X86_RDC321X) += arch/x86/mach-rdc321x/ - # default subarch .h files mflags-y += -Iinclude/asm-x86/mach-default diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index a34b9982c7c..cc0ef13fba7 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -24,10 +24,14 @@ #include <linux/edd.h> #include <asm/boot.h> #include <asm/setup.h> +#include "bitops.h" +#include <asm/cpufeature.h> /* Useful macros */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + extern struct setup_header hdr; extern struct boot_params boot_params; @@ -242,6 +246,12 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize); int cmdline_find_option_bool(const char *option); /* cpu.c, cpucheck.c */ +struct cpu_features { + int level; /* Family, or 64 for x86-64 */ + int model; + u32 flags[NCAPINTS]; +}; +extern struct cpu_features cpu; int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); int validate_cpu(void); diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index bc5553b496f..aaf5a2131ef 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -16,7 +16,7 @@ */ #undef CONFIG_PARAVIRT #ifdef CONFIG_X86_32 -#define _ASM_DESC_H_ 1 +#define ASM_X86__DESC_H 1 #endif #ifdef CONFIG_X86_64 @@ -182,8 +182,6 @@ static unsigned outcnt; static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); /* * This is set up by the setup-routine at boot-time @@ -196,9 +194,6 @@ extern int input_len; static long bytes_out; -static void *malloc(int size); -static void free(void *where); - static void *memset(void *s, int c, unsigned n); static void *memcpy(void *dest, const void *src, unsigned n); @@ -220,40 +215,6 @@ static int lines, cols; #include "../../../../lib/inflate.c" -static void *malloc(int size) -{ - void *p; - - if (size < 0) - error("Malloc error"); - if (free_mem_ptr <= 0) - error("Memory error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_end_ptr) - error("Out of memory"); - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (memptr) *ptr; -} - static void scroll(void) { int i; diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 92d6fd73dc7..75298fe2edc 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -16,9 +16,6 @@ */ #include "boot.h" -#include "bitops.h" -#include <asm/cpufeature.h> - #include "cpustr.h" static char *cpu_name(int level) diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 7804389ee00..4d3ff037201 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -22,21 +22,13 @@ #ifdef _SETUP # include "boot.h" -# include "bitops.h" #endif #include <linux/types.h> -#include <asm/cpufeature.h> #include <asm/processor-flags.h> #include <asm/required-features.h> #include <asm/msr-index.h> -struct cpu_features { - int level; /* Family, or 64 for x86-64 */ - int model; - u32 flags[NCAPINTS]; -}; - -static struct cpu_features cpu; +struct cpu_features cpu; static u32 cpu_vendor[3]; static u32 err_flags[NCAPINTS]; @@ -46,12 +38,12 @@ static const u32 req_flags[NCAPINTS] = { REQUIRED_MASK0, REQUIRED_MASK1, - REQUIRED_MASK2, - REQUIRED_MASK3, + 0, /* REQUIRED_MASK2 not implemented in this file */ + 0, /* REQUIRED_MASK3 not implemented in this file */ REQUIRED_MASK4, - REQUIRED_MASK5, + 0, /* REQUIRED_MASK5 not implemented in this file */ REQUIRED_MASK6, - REQUIRED_MASK7, + 0, /* REQUIRED_MASK7 not implemented in this file */ }; #define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 2296164b54d..197421db1af 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -73,6 +73,11 @@ static void keyboard_set_repeat(void) */ static void query_ist(void) { + /* Some older BIOSes apparently crash on this call, so filter + it from machines too old to have SpeedStep at all. */ + if (cpu.level < 6) + return; + asm("int $0x15" : "=a" (boot_params.ist_info.signature), "=b" (boot_params.ist_info.command), diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 53165c97336..8c3c25f3557 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -13,7 +13,6 @@ */ #include "boot.h" -#include <linux/kernel.h> #define SMAP 0x534d4150 /* ASCII "SMAP" */ diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index bbe76953bae..4589caa3e9d 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -15,7 +15,7 @@ #include <stdio.h> -#include "../kernel/cpu/feature_names.c" +#include "../kernel/cpu/capflags.c" #if NCAPFLAGS > 8 # error "Need to adjust the boot code handling of CPUID strings" diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 4d73f53287b..104275e191a 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,13 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.26-rc1 -# Sun May 4 19:59:02 2008 +# Linux kernel version: 2.6.27-rc4 +# Mon Aug 25 15:04:00 2008 # # CONFIG_64BIT is not set CONFIG_X86_32=y # CONFIG_X86_64 is not set CONFIG_X86=y -CONFIG_DEFCONFIG_LIST="arch/x86/configs/i386_defconfig" +CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" # CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y @@ -53,6 +53,7 @@ CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y CONFIG_X86_TRAMPOLINE=y CONFIG_KTIME_SCALAR=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # # General setup @@ -82,6 +83,7 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_NS=y # CONFIG_CGROUP_DEVICE is not set CONFIG_CPUSETS=y +CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set @@ -105,7 +107,6 @@ CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y @@ -113,6 +114,7 @@ CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +CONFIG_PCSPKR_PLATFORM=y # CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y @@ -132,27 +134,35 @@ CONFIG_MARKERS=y # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y CONFIG_KPROBES=y +CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_KRETPROBES=y +CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_ARCH_TRACEHOOK is not set # CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_USE_GENERIC_SMP_HELPERS=y +# CONFIG_HAVE_CLK is not set CONFIG_PROC_PAGE_MONITOR=y +CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set +CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_LBD is not set CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_LSF is not set CONFIG_BLK_DEV_BSG=y +# CONFIG_BLK_DEV_INTEGRITY is not set # # IO Schedulers @@ -176,19 +186,17 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set -# CONFIG_X86_NUMAQ is not set -# CONFIG_X86_SUMMIT is not set -# CONFIG_X86_BIGSMP is not set -# CONFIG_X86_VISWS is not set # CONFIG_X86_GENERICARCH is not set -# CONFIG_X86_ES7000 is not set -# CONFIG_X86_RDC321X is not set # CONFIG_X86_VSMP is not set +# CONFIG_X86_RDC321X is not set CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y # CONFIG_PARAVIRT_GUEST is not set +# CONFIG_MEMTEST is not set # CONFIG_M386 is not set # CONFIG_M486 is not set # CONFIG_M586 is not set @@ -215,21 +223,19 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y # CONFIG_MPSC is not set CONFIG_MCORE2=y # CONFIG_GENERIC_CPU is not set -# CONFIG_X86_GENERIC is not set +CONFIG_X86_GENERIC=y CONFIG_X86_CPU=y CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=6 +CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_XADD=y CONFIG_X86_WP_WORKS_OK=y CONFIG_X86_INVLPG=y CONFIG_X86_BSWAP=y CONFIG_X86_POPAD_OK=y -CONFIG_X86_GOOD_APIC=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_P6_NOP=y CONFIG_X86_TSC=y -CONFIG_X86_MINIMUM_CPU_FAMILY=6 +CONFIG_X86_MINIMUM_CPU_FAMILY=4 CONFIG_X86_DEBUGCTLMSR=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y @@ -247,7 +253,7 @@ CONFIG_X86_IO_APIC=y CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set -# CONFIG_X86_REBOOTFIXUPS is not set +CONFIG_X86_REBOOTFIXUPS=y # CONFIG_MICROCODE is not set CONFIG_X86_MSR=y CONFIG_X86_CPUID=y @@ -256,32 +262,28 @@ CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set CONFIG_PAGE_OFFSET=0xC0000000 CONFIG_HIGHMEM=y -CONFIG_NEED_NODE_MEMMAP_SIZE=y CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_SELECT_MEMORY_MODEL=y -# CONFIG_FLATMEM_MANUAL is not set +CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set -CONFIG_SPARSEMEM_MANUAL=y -CONFIG_SPARSEMEM=y -CONFIG_HAVE_MEMORY_PRESENT=y +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_SPARSEMEM_STATIC=y # CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set - -# -# Memory hotplug is currently incompatible with Software Suspend -# CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y -# CONFIG_HIGHPTE is not set +CONFIG_HIGHPTE=y # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y -# CONFIG_X86_PAT is not set +# CONFIG_MTRR_SANITIZER is not set +CONFIG_X86_PAT=y CONFIG_EFI=y # CONFIG_IRQBALANCE is not set CONFIG_SECCOMP=y @@ -293,6 +295,7 @@ CONFIG_HZ=1000 CONFIG_SCHED_HRTICK=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +# CONFIG_KEXEC_JUMP is not set CONFIG_PHYSICAL_START=0x1000000 CONFIG_RELOCATABLE=y CONFIG_PHYSICAL_ALIGN=0x200000 @@ -312,6 +315,7 @@ CONFIG_PM_TRACE_RTC=y CONFIG_PM_SLEEP_SMP=y CONFIG_PM_SLEEP=y CONFIG_SUSPEND=y +# CONFIG_PM_TEST_SUSPEND is not set CONFIG_SUSPEND_FREEZER=y CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="" @@ -337,6 +341,7 @@ CONFIG_ACPI_THERMAL=y CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y +# CONFIG_ACPI_PCI_SLOT is not set CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y @@ -395,8 +400,8 @@ CONFIG_PCI=y # CONFIG_PCI_GOBIOS is not set # CONFIG_PCI_GOMMCONFIG is not set # CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y # CONFIG_PCI_GOOLPC is not set +CONFIG_PCI_GOANY=y CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y @@ -448,10 +453,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_AOUT is not set CONFIG_BINFMT_MISC=y - -# -# Networking -# CONFIG_NET=y # @@ -475,7 +476,10 @@ CONFIG_IP_FIB_HASH=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y -# CONFIG_IP_PNP is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set CONFIG_IP_MROUTE=y @@ -618,7 +622,6 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set -# CONFIG_NET_SCH_RR is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set @@ -680,28 +683,19 @@ CONFIG_FIB_RULES=y CONFIG_CFG80211=y CONFIG_NL80211=y CONFIG_WIRELESS_EXT=y +CONFIG_WIRELESS_EXT_SYSFS=y CONFIG_MAC80211=y # # Rate control algorithm selection # +CONFIG_MAC80211_RC_PID=y CONFIG_MAC80211_RC_DEFAULT_PID=y -# CONFIG_MAC80211_RC_DEFAULT_NONE is not set - -# -# Selecting 'y' for an algorithm will -# - -# -# build the algorithm into mac80211. -# CONFIG_MAC80211_RC_DEFAULT="pid" -CONFIG_MAC80211_RC_PID=y # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set -# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set -# CONFIG_MAC80211_DEBUG is not set +# CONFIG_MAC80211_DEBUG_MENU is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -717,6 +711,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" # CONFIG_DEBUG_DRIVER is not set CONFIG_DEBUG_DEVRES=y # CONFIG_SYS_HYPERVISOR is not set @@ -749,6 +745,7 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 # CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set +# CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y # CONFIG_IBM_ASM is not set # CONFIG_PHANTOM is not set @@ -760,10 +757,12 @@ CONFIG_MISC_DEVICES=y # CONFIG_FUJITSU_LAPTOP is not set # CONFIG_TC1100_WMI is not set # CONFIG_MSI_LAPTOP is not set +# CONFIG_COMPAL_LAPTOP is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set # CONFIG_INTEL_MENLOW is not set # CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set CONFIG_HAVE_IDE=y # CONFIG_IDE is not set @@ -802,12 +801,13 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set -# CONFIG_SCSI_ISCSI_ATTRS is not set +CONFIG_SCSI_ISCSI_ATTRS=y # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set # CONFIG_SCSI_LOWLEVEL is not set # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set +# CONFIG_SCSI_DH is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y @@ -842,7 +842,7 @@ CONFIG_PATA_AMD=y # CONFIG_PATA_CS5536 is not set # CONFIG_PATA_CYPRESS is not set # CONFIG_PATA_EFAR is not set -# CONFIG_ATA_GENERIC is not set +CONFIG_ATA_GENERIC=y # CONFIG_PATA_HPT366 is not set # CONFIG_PATA_HPT37X is not set # CONFIG_PATA_HPT3X2N is not set @@ -852,7 +852,7 @@ CONFIG_PATA_AMD=y # CONFIG_PATA_JMICRON is not set # CONFIG_PATA_TRIFLEX is not set # CONFIG_PATA_MARVELL is not set -# CONFIG_PATA_MPIIX is not set +CONFIG_PATA_MPIIX=y CONFIG_PATA_OLDPIIX=y # CONFIG_PATA_NETCELL is not set # CONFIG_PATA_NINJA32 is not set @@ -871,6 +871,7 @@ CONFIG_PATA_OLDPIIX=y # CONFIG_PATA_SIS is not set # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set +CONFIG_PATA_SCH=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_LINEAR is not set @@ -894,13 +895,16 @@ CONFIG_DM_ZERO=y # # IEEE 1394 (FireWire) support # + +# +# Enable only one of the two stacks, unless you know what you are doing +# # CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_MACINTOSH_DRIVERS=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -# CONFIG_NETDEVICES_MULTIQUEUE is not set # CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set @@ -910,7 +914,23 @@ CONFIG_NETDEVICES=y # CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set -# CONFIG_PHYLIB is not set +CONFIG_PHYLIB=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set +# CONFIG_VITESSE_PHY is not set +# CONFIG_SMSC_PHY is not set +# CONFIG_BROADCOM_PHY is not set +# CONFIG_ICPLUS_PHY is not set +# CONFIG_REALTEK_PHY is not set +# CONFIG_FIXED_PHY is not set +# CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set @@ -943,10 +963,10 @@ CONFIG_FORCEDETH=y CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set -# CONFIG_NE2K_PCI is not set +CONFIG_NE2K_PCI=y # CONFIG_8139CP is not set CONFIG_8139TOO=y -CONFIG_8139TOO_PIO=y +# CONFIG_8139TOO_PIO is not set # CONFIG_8139TOO_TUNE_TWISTER is not set # CONFIG_8139TOO_8129 is not set # CONFIG_8139_OLD_RX_RESET is not set @@ -961,25 +981,24 @@ CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y -# CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set -# CONFIG_E1000E is not set -# CONFIG_E1000E_ENABLED is not set +CONFIG_E1000E=y # CONFIG_IP1000 is not set # CONFIG_IGB is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set +CONFIG_R8169=y # CONFIG_SIS190 is not set # CONFIG_SKGE is not set CONFIG_SKY2=y # CONFIG_SKY2_DEBUG is not set # CONFIG_VIA_VELOCITY is not set CONFIG_TIGON3=y -# CONFIG_BNX2 is not set +CONFIG_BNX2=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set @@ -1019,13 +1038,14 @@ CONFIG_WLAN_80211=y # CONFIG_RTL8180 is not set # CONFIG_RTL8187 is not set # CONFIG_ADM8211 is not set +# CONFIG_MAC80211_HWSIM is not set # CONFIG_P54_COMMON is not set CONFIG_ATH5K=y # CONFIG_ATH5K_DEBUG is not set -# CONFIG_IWLWIFI is not set +# CONFIG_ATH9K is not set # CONFIG_IWLCORE is not set # CONFIG_IWLWIFI_LEDS is not set -# CONFIG_IWL4965 is not set +# CONFIG_IWLAGN is not set # CONFIG_IWL3945 is not set # CONFIG_HOSTAP is not set # CONFIG_B43 is not set @@ -1105,6 +1125,7 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y # CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_BCM5974 is not set # CONFIG_MOUSE_VSXXXAA is not set CONFIG_INPUT_JOYSTICK=y # CONFIG_JOYSTICK_ANALOG is not set @@ -1139,12 +1160,14 @@ CONFIG_INPUT_TOUCHSCREEN=y # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set # CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set # CONFIG_TOUCHSCREEN_MK712 is not set # CONFIG_TOUCHSCREEN_PENMOUNT is not set # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set # CONFIG_TOUCHSCREEN_TOUCHWIN is not set # CONFIG_TOUCHSCREEN_UCB1400 is not set # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_APANEL is not set @@ -1173,6 +1196,7 @@ CONFIG_SERIO_LIBPS2=y # Character devices # CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y @@ -1223,8 +1247,8 @@ CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y -# CONFIG_HW_RANDOM_INTEL is not set -# CONFIG_HW_RANDOM_AMD is not set +CONFIG_HW_RANDOM_INTEL=y +CONFIG_HW_RANDOM_AMD=y CONFIG_HW_RANDOM_GEODE=y CONFIG_HW_RANDOM_VIA=y CONFIG_NVRAM=y @@ -1245,7 +1269,6 @@ CONFIG_NVRAM=y # CONFIG_CS5535_GPIO is not set # CONFIG_RAW_DRIVER is not set CONFIG_HPET=y -# CONFIG_HPET_RTC_IRQ is not set # CONFIG_HPET_MMAP is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set @@ -1254,43 +1277,64 @@ CONFIG_DEVPORT=y CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y # CONFIG_I2C_CHARDEV is not set +CONFIG_I2C_HELPER_AUTO=y # # I2C Hardware Bus support # + +# +# PC SMBus host controller drivers +# # CONFIG_I2C_ALI1535 is not set # CONFIG_I2C_ALI1563 is not set # CONFIG_I2C_ALI15X3 is not set # CONFIG_I2C_AMD756 is not set # CONFIG_I2C_AMD8111 is not set CONFIG_I2C_I801=y -# CONFIG_I2C_I810 is not set +# CONFIG_I2C_ISCH is not set # CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_NFORCE2 is not set -# CONFIG_I2C_OCORES is not set -# CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PROSAVAGE is not set -# CONFIG_I2C_SAVAGE4 is not set -# CONFIG_I2C_SIMTEC is not set -# CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_TAOS_EVM is not set -# CONFIG_I2C_STUB is not set -# CONFIG_I2C_TINY_USB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Graphics adapter I2C/DDC channel drivers +# # CONFIG_I2C_VOODOO3 is not set + +# +# Other I2C/SMBus bus drivers +# # CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_STUB is not set +# CONFIG_SCx200_ACB is not set # # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set +# CONFIG_AT24 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_MAX6875 is not set # CONFIG_SENSORS_TSL2550 is not set @@ -1299,6 +1343,8 @@ CONFIG_I2C_I801=y # CONFIG_I2C_DEBUG_BUS is not set # CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set +CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y +# CONFIG_GPIOLIB is not set # CONFIG_W1 is not set CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set @@ -1360,8 +1406,10 @@ CONFIG_SSB_POSSIBLE=y # # Multifunction device drivers # +# CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set # # Multimedia devices @@ -1372,6 +1420,7 @@ CONFIG_SSB_POSSIBLE=y # # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set # # Multimedia drivers @@ -1418,7 +1467,6 @@ CONFIG_FB_CFB_IMAGEBLIT=y # CONFIG_FB_SYS_IMAGEBLIT is not set # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_SYS_FOPS is not set -CONFIG_FB_DEFERRED_IO=y # CONFIG_FB_SVGALIB is not set # CONFIG_FB_MACMODES is not set # CONFIG_FB_BACKLIGHT is not set @@ -1463,6 +1511,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_TRIDENT is not set # CONFIG_FB_ARK is not set # CONFIG_FB_PM3 is not set +# CONFIG_FB_CARMINE is not set # CONFIG_FB_GEODE is not set # CONFIG_FB_VIRTUAL is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y @@ -1470,6 +1519,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_BACKLIGHT_CLASS_DEVICE=y # CONFIG_BACKLIGHT_CORGI is not set # CONFIG_BACKLIGHT_PROGEAR is not set +# CONFIG_BACKLIGHT_MBP_NVIDIA is not set # # Display device support @@ -1489,15 +1539,7 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_LOGO_LINUX_CLUT224=y - -# -# Sound -# CONFIG_SOUND=y - -# -# Advanced Linux Sound Architecture -# CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y @@ -1515,20 +1557,14 @@ CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set CONFIG_SND_VMASTER=y - -# -# Generic devices -# +CONFIG_SND_DRIVERS=y # CONFIG_SND_PCSP is not set # CONFIG_SND_DUMMY is not set # CONFIG_SND_VIRMIDI is not set # CONFIG_SND_MTPAV is not set # CONFIG_SND_SERIAL_U16550 is not set # CONFIG_SND_MPU401 is not set - -# -# PCI devices -# +CONFIG_SND_PCI=y # CONFIG_SND_AD1889 is not set # CONFIG_SND_ALS300 is not set # CONFIG_SND_ALS4000 is not set @@ -1603,36 +1639,14 @@ CONFIG_SND_HDA_GENERIC=y # CONFIG_SND_VIRTUOSO is not set # CONFIG_SND_VX222 is not set # CONFIG_SND_YMFPCI is not set - -# -# USB devices -# +CONFIG_SND_USB=y # CONFIG_SND_USB_AUDIO is not set # CONFIG_SND_USB_USX2Y is not set # CONFIG_SND_USB_CAIAQ is not set - -# -# PCMCIA devices -# +CONFIG_SND_PCMCIA=y # CONFIG_SND_VXPOCKET is not set # CONFIG_SND_PDAUDIOCF is not set - -# -# System on Chip audio support -# # CONFIG_SND_SOC is not set - -# -# ALSA SoC audio for Freescale SOCs -# - -# -# SoC Audio for the Texas Instruments OMAP -# - -# -# Open Sound System -# # CONFIG_SOUND_PRIME is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y @@ -1668,6 +1682,7 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_DYNAMIC_MINORS is not set CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set +CONFIG_USB_MON=y # # USB Host Controller Drivers @@ -1691,6 +1706,7 @@ CONFIG_USB_UHCI_HCD=y # # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y +# CONFIG_USB_WDM is not set # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -1712,6 +1728,7 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_ALAUDA is not set # CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_SIERRA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set CONFIG_USB_LIBUSUAL=y @@ -1720,7 +1737,6 @@ CONFIG_USB_LIBUSUAL=y # # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set -CONFIG_USB_MON=y # # USB port drivers @@ -1733,7 +1749,6 @@ CONFIG_USB_MON=y # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set # CONFIG_USB_ADUTUX is not set -# CONFIG_USB_AUERSWALD is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set @@ -1750,6 +1765,7 @@ CONFIG_USB_MON=y # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set # CONFIG_USB_GADGET is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set @@ -1759,7 +1775,9 @@ CONFIG_LEDS_CLASS=y # # LED drivers # +# CONFIG_LEDS_PCA9532 is not set # CONFIG_LEDS_CLEVO_MAIL is not set +# CONFIG_LEDS_PCA955X is not set # # LED Triggers @@ -1805,6 +1823,7 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_PCF8583 is not set # CONFIG_RTC_DRV_M41T80 is not set # CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set # # SPI RTC drivers @@ -1837,11 +1856,13 @@ CONFIG_DMADEVICES=y # Firmware Drivers # # CONFIG_EDD is not set +CONFIG_FIRMWARE_MEMMAP=y CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y -# CONFIG_ISCSI_IBFT_FIND is not set +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_ISCSI_IBFT=y # # File systems @@ -1920,14 +1941,27 @@ CONFIG_HUGETLB_PAGE=y # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y -# CONFIG_NFS_FS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y # CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_ACL_SUPPORT=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +CONFIG_RPCSEC_GSS_KRB5=y +# CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set @@ -2001,9 +2035,9 @@ CONFIG_NLS_UTF8=y # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y -# CONFIG_PRINTK_TIME is not set -# CONFIG_ENABLE_WARN_DEPRECATED is not set -# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_PRINTK_TIME=y +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set @@ -2033,6 +2067,7 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_WRITECOUNT is not set +CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set CONFIG_FRAME_POINTER=y @@ -2043,23 +2078,32 @@ CONFIG_FRAME_POINTER=y # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_HAVE_FTRACE=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +# CONFIG_FTRACE is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SYSPROF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set CONFIG_PROVIDE_OHCI1394_DMA_INIT=y # CONFIG_SAMPLES is not set -# CONFIG_KGDB is not set CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set # CONFIG_STRICT_DEVMEM is not set +CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_DEBUG_PER_CPU_MAPS is not set # CONFIG_X86_PTDUMP is not set CONFIG_DEBUG_RODATA=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m # CONFIG_4KSTACKS is not set -CONFIG_X86_FIND_SMP_CONFIG=y -CONFIG_X86_MPPARSE=y CONFIG_DOUBLEFAULT=y +# CONFIG_MMIOTRACE is not set CONFIG_IO_DELAY_TYPE_0X80=0 CONFIG_IO_DELAY_TYPE_0XED=1 CONFIG_IO_DELAY_TYPE_UDELAY=2 @@ -2071,6 +2115,7 @@ CONFIG_IO_DELAY_0X80=y CONFIG_DEFAULT_IO_DELAY_TYPE=0 CONFIG_DEBUG_BOOT_PARAMS=y # CONFIG_CPA_DEBUG is not set +# CONFIG_OPTIMIZE_INLINING is not set # # Security options @@ -2080,7 +2125,6 @@ CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y # CONFIG_SECURITY_NETWORK_XFRM is not set -CONFIG_SECURITY_CAPABILITIES=y CONFIG_SECURITY_FILE_CAPABILITIES=y # CONFIG_SECURITY_ROOTPLUG is not set CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 @@ -2141,6 +2185,10 @@ CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y # CONFIG_CRYPTO_SHA256 is not set # CONFIG_CRYPTO_SHA512 is not set @@ -2151,7 +2199,7 @@ CONFIG_CRYPTO_SHA1=y # Ciphers # CONFIG_CRYPTO_AES=y -# CONFIG_CRYPTO_AES_586 is not set +CONFIG_CRYPTO_AES_586=y # CONFIG_CRYPTO_ANUBIS is not set CONFIG_CRYPTO_ARC4=y # CONFIG_CRYPTO_BLOWFISH is not set @@ -2193,6 +2241,7 @@ CONFIG_GENERIC_FIND_FIRST_BIT=y CONFIG_GENERIC_FIND_NEXT_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set +CONFIG_CRC_T10DIF=y # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC7 is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index a4045242962..678c8acefe0 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,13 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.26-rc1 -# Sun May 4 19:59:57 2008 +# Linux kernel version: 2.6.27-rc4 +# Mon Aug 25 14:40:46 2008 # CONFIG_64BIT=y # CONFIG_X86_32 is not set CONFIG_X86_64=y CONFIG_X86=y -CONFIG_DEFCONFIG_LIST="arch/x86/configs/x86_64_defconfig" +CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" # CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y @@ -53,6 +53,7 @@ CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y CONFIG_X86_TRAMPOLINE=y # CONFIG_KTIME_SCALAR is not set +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # # General setup @@ -82,6 +83,7 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_NS=y # CONFIG_CGROUP_DEVICE is not set CONFIG_CPUSETS=y +CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set @@ -105,7 +107,6 @@ CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y @@ -113,6 +114,7 @@ CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y +CONFIG_PCSPKR_PLATFORM=y # CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y @@ -132,25 +134,33 @@ CONFIG_MARKERS=y # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y CONFIG_KPROBES=y +CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_KRETPROBES=y +CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y +# CONFIG_HAVE_ARCH_TRACEHOOK is not set # CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_USE_GENERIC_SMP_HELPERS=y +# CONFIG_HAVE_CLK is not set CONFIG_PROC_PAGE_MONITOR=y +# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set +CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_BSG=y +# CONFIG_BLK_DEV_INTEGRITY is not set CONFIG_BLOCK_COMPAT=y # @@ -175,20 +185,15 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set -# CONFIG_X86_NUMAQ is not set -# CONFIG_X86_SUMMIT is not set -# CONFIG_X86_BIGSMP is not set -# CONFIG_X86_VISWS is not set # CONFIG_X86_GENERICARCH is not set -# CONFIG_X86_ES7000 is not set -# CONFIG_X86_RDC321X is not set # CONFIG_X86_VSMP is not set # CONFIG_PARAVIRT_GUEST is not set -CONFIG_MEMTEST_BOOTPARAM=y -CONFIG_MEMTEST_BOOTPARAM_VALUE=0 +# CONFIG_MEMTEST is not set # CONFIG_M386 is not set # CONFIG_M486 is not set # CONFIG_M586 is not set @@ -220,11 +225,12 @@ CONFIG_X86_L1_CACHE_BYTES=64 CONFIG_X86_INTERNODE_CACHE_BYTES=64 CONFIG_X86_CMPXCHG=y CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_GOOD_APIC=y +CONFIG_X86_WP_WORKS_OK=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y CONFIG_X86_P6_NOP=y CONFIG_X86_TSC=y +CONFIG_X86_CMPXCHG64=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=64 CONFIG_X86_DEBUGCTLMSR=y @@ -234,8 +240,10 @@ CONFIG_DMI=y CONFIG_GART_IOMMU=y CONFIG_CALGARY_IOMMU=y CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y +CONFIG_AMD_IOMMU=y CONFIG_SWIOTLB=y CONFIG_IOMMU_HELPER=y +# CONFIG_MAXSMP is not set CONFIG_NR_CPUS=4 # CONFIG_SCHED_SMT is not set CONFIG_SCHED_MC=y @@ -281,6 +289,7 @@ CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_MTRR=y +# CONFIG_MTRR_SANITIZER is not set # CONFIG_X86_PAT is not set CONFIG_EFI=y CONFIG_SECCOMP=y @@ -313,6 +322,7 @@ CONFIG_PM_TRACE_RTC=y CONFIG_PM_SLEEP_SMP=y CONFIG_PM_SLEEP=y CONFIG_SUSPEND=y +# CONFIG_PM_TEST_SUSPEND is not set CONFIG_SUSPEND_FREEZER=y CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="" @@ -339,6 +349,7 @@ CONFIG_ACPI_NUMA=y CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y +# CONFIG_ACPI_PCI_SLOT is not set CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y @@ -437,10 +448,6 @@ CONFIG_IA32_EMULATION=y CONFIG_COMPAT=y CONFIG_COMPAT_FOR_U64_ALIGNMENT=y CONFIG_SYSVIPC_COMPAT=y - -# -# Networking -# CONFIG_NET=y # @@ -464,7 +471,10 @@ CONFIG_IP_FIB_HASH=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y -# CONFIG_IP_PNP is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set CONFIG_IP_MROUTE=y @@ -607,7 +617,6 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set -# CONFIG_NET_SCH_RR is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set @@ -669,28 +678,19 @@ CONFIG_FIB_RULES=y CONFIG_CFG80211=y CONFIG_NL80211=y CONFIG_WIRELESS_EXT=y +CONFIG_WIRELESS_EXT_SYSFS=y CONFIG_MAC80211=y # # Rate control algorithm selection # +CONFIG_MAC80211_RC_PID=y CONFIG_MAC80211_RC_DEFAULT_PID=y -# CONFIG_MAC80211_RC_DEFAULT_NONE is not set - -# -# Selecting 'y' for an algorithm will -# - -# -# build the algorithm into mac80211. -# CONFIG_MAC80211_RC_DEFAULT="pid" -CONFIG_MAC80211_RC_PID=y # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set -# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set -# CONFIG_MAC80211_DEBUG is not set +# CONFIG_MAC80211_DEBUG_MENU is not set # CONFIG_IEEE80211 is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -706,6 +706,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" # CONFIG_DEBUG_DRIVER is not set CONFIG_DEBUG_DEVRES=y # CONFIG_SYS_HYPERVISOR is not set @@ -738,6 +740,7 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 # CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set +# CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y # CONFIG_IBM_ASM is not set # CONFIG_PHANTOM is not set @@ -748,10 +751,14 @@ CONFIG_MISC_DEVICES=y # CONFIG_ASUS_LAPTOP is not set # CONFIG_FUJITSU_LAPTOP is not set # CONFIG_MSI_LAPTOP is not set +# CONFIG_COMPAL_LAPTOP is not set # CONFIG_SONY_LAPTOP is not set # CONFIG_THINKPAD_ACPI is not set # CONFIG_INTEL_MENLOW is not set # CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_SGI_XP is not set +# CONFIG_HP_ILO is not set +# CONFIG_SGI_GRU is not set CONFIG_HAVE_IDE=y # CONFIG_IDE is not set @@ -790,12 +797,13 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set -# CONFIG_SCSI_ISCSI_ATTRS is not set +CONFIG_SCSI_ISCSI_ATTRS=y # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set # CONFIG_SCSI_LOWLEVEL is not set # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set +# CONFIG_SCSI_DH is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y @@ -857,6 +865,7 @@ CONFIG_PATA_OLDPIIX=y # CONFIG_PATA_SIS is not set # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set +CONFIG_PATA_SCH=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_LINEAR is not set @@ -880,13 +889,16 @@ CONFIG_DM_ZERO=y # # IEEE 1394 (FireWire) support # + +# +# Enable only one of the two stacks, unless you know what you are doing +# # CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_MACINTOSH_DRIVERS=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -# CONFIG_NETDEVICES_MULTIQUEUE is not set # CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set @@ -896,7 +908,23 @@ CONFIG_NETDEVICES=y # CONFIG_VETH is not set # CONFIG_NET_SB1000 is not set # CONFIG_ARCNET is not set -# CONFIG_PHYLIB is not set +CONFIG_PHYLIB=y + +# +# MII PHY device drivers +# +# CONFIG_MARVELL_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_CICADA_PHY is not set +# CONFIG_VITESSE_PHY is not set +# CONFIG_SMSC_PHY is not set +# CONFIG_BROADCOM_PHY is not set +# CONFIG_ICPLUS_PHY is not set +# CONFIG_REALTEK_PHY is not set +# CONFIG_FIXED_PHY is not set +# CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set @@ -940,16 +968,15 @@ CONFIG_8139TOO_PIO=y # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y -# CONFIG_E1000_NAPI is not set # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set # CONFIG_E1000E is not set -# CONFIG_E1000E_ENABLED is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set # CONFIG_NS83820 is not set @@ -965,6 +992,7 @@ CONFIG_TIGON3=y # CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set @@ -1003,13 +1031,14 @@ CONFIG_WLAN_80211=y # CONFIG_RTL8180 is not set # CONFIG_RTL8187 is not set # CONFIG_ADM8211 is not set +# CONFIG_MAC80211_HWSIM is not set # CONFIG_P54_COMMON is not set CONFIG_ATH5K=y # CONFIG_ATH5K_DEBUG is not set -# CONFIG_IWLWIFI is not set +# CONFIG_ATH9K is not set # CONFIG_IWLCORE is not set # CONFIG_IWLWIFI_LEDS is not set -# CONFIG_IWL4965 is not set +# CONFIG_IWLAGN is not set # CONFIG_IWL3945 is not set # CONFIG_HOSTAP is not set # CONFIG_B43 is not set @@ -1088,6 +1117,7 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y # CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_BCM5974 is not set # CONFIG_MOUSE_VSXXXAA is not set CONFIG_INPUT_JOYSTICK=y # CONFIG_JOYSTICK_ANALOG is not set @@ -1122,12 +1152,14 @@ CONFIG_INPUT_TOUCHSCREEN=y # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set # CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set # CONFIG_TOUCHSCREEN_MK712 is not set # CONFIG_TOUCHSCREEN_PENMOUNT is not set # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set # CONFIG_TOUCHSCREEN_TOUCHWIN is not set # CONFIG_TOUCHSCREEN_UCB1400 is not set # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_APANEL is not set @@ -1155,6 +1187,7 @@ CONFIG_SERIO_LIBPS2=y # Character devices # CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y @@ -1222,7 +1255,6 @@ CONFIG_NVRAM=y # CONFIG_PC8736x_GPIO is not set # CONFIG_RAW_DRIVER is not set CONFIG_HPET=y -# CONFIG_HPET_RTC_IRQ is not set # CONFIG_HPET_MMAP is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set @@ -1231,42 +1263,63 @@ CONFIG_DEVPORT=y CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y # CONFIG_I2C_CHARDEV is not set +CONFIG_I2C_HELPER_AUTO=y # # I2C Hardware Bus support # + +# +# PC SMBus host controller drivers +# # CONFIG_I2C_ALI1535 is not set # CONFIG_I2C_ALI1563 is not set # CONFIG_I2C_ALI15X3 is not set # CONFIG_I2C_AMD756 is not set # CONFIG_I2C_AMD8111 is not set CONFIG_I2C_I801=y -# CONFIG_I2C_I810 is not set +# CONFIG_I2C_ISCH is not set # CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_NFORCE2 is not set -# CONFIG_I2C_OCORES is not set -# CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PROSAVAGE is not set -# CONFIG_I2C_SAVAGE4 is not set -# CONFIG_I2C_SIMTEC is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_TAOS_EVM is not set -# CONFIG_I2C_STUB is not set -# CONFIG_I2C_TINY_USB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Graphics adapter I2C/DDC channel drivers +# # CONFIG_I2C_VOODOO3 is not set + +# +# Other I2C/SMBus bus drivers +# # CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_STUB is not set # # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set +# CONFIG_AT24 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_MAX6875 is not set # CONFIG_SENSORS_TSL2550 is not set @@ -1275,6 +1328,8 @@ CONFIG_I2C_I801=y # CONFIG_I2C_DEBUG_BUS is not set # CONFIG_I2C_DEBUG_CHIP is not set # CONFIG_SPI is not set +CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y +# CONFIG_GPIOLIB is not set # CONFIG_W1 is not set CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set @@ -1335,8 +1390,10 @@ CONFIG_SSB_POSSIBLE=y # # Multifunction device drivers # +# CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set # # Multimedia devices @@ -1347,6 +1404,7 @@ CONFIG_SSB_POSSIBLE=y # # CONFIG_VIDEO_DEV is not set # CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set # # Multimedia drivers @@ -1387,7 +1445,6 @@ CONFIG_FB_CFB_IMAGEBLIT=y # CONFIG_FB_SYS_IMAGEBLIT is not set # CONFIG_FB_FOREIGN_ENDIAN is not set # CONFIG_FB_SYS_FOPS is not set -CONFIG_FB_DEFERRED_IO=y # CONFIG_FB_SVGALIB is not set # CONFIG_FB_MACMODES is not set # CONFIG_FB_BACKLIGHT is not set @@ -1430,6 +1487,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_TRIDENT is not set # CONFIG_FB_ARK is not set # CONFIG_FB_PM3 is not set +# CONFIG_FB_CARMINE is not set # CONFIG_FB_GEODE is not set # CONFIG_FB_VIRTUAL is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y @@ -1437,6 +1495,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_BACKLIGHT_CLASS_DEVICE=y # CONFIG_BACKLIGHT_CORGI is not set # CONFIG_BACKLIGHT_PROGEAR is not set +# CONFIG_BACKLIGHT_MBP_NVIDIA is not set # # Display device support @@ -1456,15 +1515,7 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_LOGO_LINUX_CLUT224=y - -# -# Sound -# CONFIG_SOUND=y - -# -# Advanced Linux Sound Architecture -# CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y @@ -1482,20 +1533,14 @@ CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set CONFIG_SND_VMASTER=y - -# -# Generic devices -# +CONFIG_SND_DRIVERS=y # CONFIG_SND_PCSP is not set # CONFIG_SND_DUMMY is not set # CONFIG_SND_VIRMIDI is not set # CONFIG_SND_MTPAV is not set # CONFIG_SND_SERIAL_U16550 is not set # CONFIG_SND_MPU401 is not set - -# -# PCI devices -# +CONFIG_SND_PCI=y # CONFIG_SND_AD1889 is not set # CONFIG_SND_ALS300 is not set # CONFIG_SND_ALS4000 is not set @@ -1568,36 +1613,14 @@ CONFIG_SND_HDA_GENERIC=y # CONFIG_SND_VIRTUOSO is not set # CONFIG_SND_VX222 is not set # CONFIG_SND_YMFPCI is not set - -# -# USB devices -# +CONFIG_SND_USB=y # CONFIG_SND_USB_AUDIO is not set # CONFIG_SND_USB_USX2Y is not set # CONFIG_SND_USB_CAIAQ is not set - -# -# PCMCIA devices -# +CONFIG_SND_PCMCIA=y # CONFIG_SND_VXPOCKET is not set # CONFIG_SND_PDAUDIOCF is not set - -# -# System on Chip audio support -# # CONFIG_SND_SOC is not set - -# -# ALSA SoC audio for Freescale SOCs -# - -# -# SoC Audio for the Texas Instruments OMAP -# - -# -# Open Sound System -# # CONFIG_SOUND_PRIME is not set CONFIG_HID_SUPPORT=y CONFIG_HID=y @@ -1633,6 +1656,7 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_DYNAMIC_MINORS is not set CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set +CONFIG_USB_MON=y # # USB Host Controller Drivers @@ -1656,6 +1680,7 @@ CONFIG_USB_UHCI_HCD=y # # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y +# CONFIG_USB_WDM is not set # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -1677,6 +1702,7 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_ALAUDA is not set # CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_SIERRA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set CONFIG_USB_LIBUSUAL=y @@ -1685,7 +1711,6 @@ CONFIG_USB_LIBUSUAL=y # # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set -CONFIG_USB_MON=y # # USB port drivers @@ -1698,7 +1723,6 @@ CONFIG_USB_MON=y # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set # CONFIG_USB_ADUTUX is not set -# CONFIG_USB_AUERSWALD is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set @@ -1715,6 +1739,7 @@ CONFIG_USB_MON=y # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set # CONFIG_USB_GADGET is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set @@ -1724,7 +1749,9 @@ CONFIG_LEDS_CLASS=y # # LED drivers # +# CONFIG_LEDS_PCA9532 is not set # CONFIG_LEDS_CLEVO_MAIL is not set +# CONFIG_LEDS_PCA955X is not set # # LED Triggers @@ -1770,6 +1797,7 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_PCF8583 is not set # CONFIG_RTC_DRV_M41T80 is not set # CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set # # SPI RTC drivers @@ -1802,11 +1830,13 @@ CONFIG_DMADEVICES=y # Firmware Drivers # # CONFIG_EDD is not set +CONFIG_FIRMWARE_MEMMAP=y CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y -# CONFIG_ISCSI_IBFT_FIND is not set +CONFIG_ISCSI_IBFT_FIND=y +CONFIG_ISCSI_IBFT=y # # File systems @@ -1886,14 +1916,27 @@ CONFIG_HUGETLB_PAGE=y # CONFIG_CRAMFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y -# CONFIG_NFS_FS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y # CONFIG_NFSD is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_ACL_SUPPORT=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +CONFIG_RPCSEC_GSS_KRB5=y +# CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set @@ -1967,9 +2010,9 @@ CONFIG_NLS_UTF8=y # Kernel hacking # CONFIG_TRACE_IRQFLAGS_SUPPORT=y -# CONFIG_PRINTK_TIME is not set -# CONFIG_ENABLE_WARN_DEPRECATED is not set -# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_PRINTK_TIME=y +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set @@ -1998,6 +2041,7 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_WRITECOUNT is not set +CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set CONFIG_FRAME_POINTER=y @@ -2008,11 +2052,20 @@ CONFIG_FRAME_POINTER=y # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_HAVE_FTRACE=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +# CONFIG_FTRACE is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SYSPROF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set CONFIG_PROVIDE_OHCI1394_DMA_INIT=y # CONFIG_SAMPLES is not set -# CONFIG_KGDB is not set CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set # CONFIG_STRICT_DEVMEM is not set +CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y @@ -2023,8 +2076,8 @@ CONFIG_DEBUG_RODATA=y # CONFIG_DIRECT_GBPAGES is not set # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m -CONFIG_X86_MPPARSE=y # CONFIG_IOMMU_DEBUG is not set +# CONFIG_MMIOTRACE is not set CONFIG_IO_DELAY_TYPE_0X80=0 CONFIG_IO_DELAY_TYPE_0XED=1 CONFIG_IO_DELAY_TYPE_UDELAY=2 @@ -2036,6 +2089,7 @@ CONFIG_IO_DELAY_0X80=y CONFIG_DEFAULT_IO_DELAY_TYPE=0 CONFIG_DEBUG_BOOT_PARAMS=y # CONFIG_CPA_DEBUG is not set +# CONFIG_OPTIMIZE_INLINING is not set # # Security options @@ -2045,7 +2099,6 @@ CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y # CONFIG_SECURITY_NETWORK_XFRM is not set -CONFIG_SECURITY_CAPABILITIES=y CONFIG_SECURITY_FILE_CAPABILITIES=y # CONFIG_SECURITY_ROOTPLUG is not set CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 @@ -2106,6 +2159,10 @@ CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y # CONFIG_CRYPTO_SHA256 is not set # CONFIG_CRYPTO_SHA512 is not set @@ -2155,6 +2212,7 @@ CONFIG_GENERIC_FIND_FIRST_BIT=y CONFIG_GENERIC_FIND_NEXT_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set +CONFIG_CRC_T10DIF=y # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC7 is not set diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 58cccb6483b..a0e1dbe67dc 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -441,12 +441,6 @@ beyond_if: regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; set_fs(USER_DS); - if (unlikely(current->ptrace & PT_PTRACED)) { - if (current->ptrace & PT_TRACE_EXEC) - ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); - else - send_sig(SIGTRAP, current, 0); - } return 0; } diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 20af4c79579..f25a1012400 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -179,9 +179,10 @@ struct sigframe u32 pretcode; int sig; struct sigcontext_ia32 sc; - struct _fpstate_ia32 fpstate; + struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */ unsigned int extramask[_COMPAT_NSIG_WORDS-1]; char retcode[8]; + /* fp state follows here */ }; struct rt_sigframe @@ -192,8 +193,8 @@ struct rt_sigframe u32 puc; compat_siginfo_t info; struct ucontext_ia32 uc; - struct _fpstate_ia32 fpstate; char retcode[8]; + /* fp state follows here */ }; #define COPY(x) { \ @@ -215,7 +216,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, unsigned int *peax) { unsigned int tmpflags, gs, oldgs, err = 0; - struct _fpstate_ia32 __user *buf; + void __user *buf; u32 tmp; /* Always make any pending restarted system calls return -EINTR */ @@ -259,26 +260,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, err |= __get_user(tmp, &sc->fpstate); buf = compat_ptr(tmp); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387_ia32(buf); - } else { - struct task_struct *me = current; - - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate_ia32(buf); err |= __get_user(tmp, &sc->ax); *peax = tmp; return err; - -badframe: - return 1; } asmlinkage long sys32_sigreturn(struct pt_regs *regs) @@ -350,7 +337,7 @@ badframe: */ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, - struct _fpstate_ia32 __user *fpstate, + void __user *fpstate, struct pt_regs *regs, unsigned int mask) { int tmp, err = 0; @@ -381,7 +368,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, err |= __put_user((u32)regs->flags, &sc->flags); err |= __put_user((u32)regs->sp, &sc->sp_at_signal); - tmp = save_i387_ia32(fpstate); + tmp = save_i387_xstate_ia32(fpstate); if (tmp < 0) err = -EFAULT; else { @@ -402,7 +389,8 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, * Determine which stack to use.. */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, - size_t frame_size) + size_t frame_size, + void **fpstate) { unsigned long sp; @@ -421,6 +409,11 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, ka->sa.sa_restorer) sp = (unsigned long) ka->sa.sa_restorer; + if (used_math()) { + sp = sp - sig_xstate_ia32_size; + *fpstate = (struct _fpstate_ia32 *) sp; + } + sp -= frame_size; /* Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ @@ -434,6 +427,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, struct sigframe __user *frame; void __user *restorer; int err = 0; + void __user *fpstate = NULL; /* copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -448,7 +442,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -457,8 +451,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, if (err) goto give_sigsegv; - err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs, - set->sig[0]); + err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -522,6 +515,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct rt_sigframe __user *frame; void __user *restorer; int err = 0; + void __user *fpstate = NULL; /* __copy_to_user optimizes that into a single 8 byte store */ static const struct { @@ -537,7 +531,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -550,13 +544,16 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index e4bd1793a5e..ffc1bb4fed7 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -201,7 +201,7 @@ sysexit_from_sys_call: movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ .endm - .macro auditsys_exit exit + .macro auditsys_exit exit,ebpsave=RBP testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) jnz int_ret_from_sys_call TRACE_IRQS_ON @@ -214,7 +214,7 @@ sysexit_from_sys_call: call audit_syscall_exit GET_THREAD_INFO(%r10) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ - movl RBP-ARGOFFSET(%rsp),%ebp /* reload user register value */ + movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi cli TRACE_IRQS_OFF @@ -347,7 +347,7 @@ cstar_auditsys: jmp cstar_dispatch sysretl_audit: - auditsys_exit sysretl_from_sys_call + auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ #endif cstar_tracesys: diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651fc8ec..c9be69fedb7 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o -obj-y += i387.o +obj-y += i387.o xsave.o obj-y += ptrace.o obj-y += ds.o obj-$(CONFIG_X86_32) += tls.o @@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o +obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o @@ -104,6 +105,8 @@ obj-$(CONFIG_OLPC) += olpc.o ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o obj-y += bios_uv.o + obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fa88a1d7129..27ef365e757 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #warning ACPI uses CMPXCHG, i486 and later hardware #endif +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; +static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) +{ + if (!strcmp(mcfg->header.oem_id, "SGI")) + acpi_mcfg_64bit_base_addr = TRUE; + + return 0; +} + int __init acpi_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; @@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header) } memcpy(pci_mmcfg_config, &mcfg[1], config_size); + + acpi_mcfg_oem_check(mcfg); + for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { + if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && + !acpi_mcfg_64bit_base_addr) { printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); kfree(pci_mmcfg_config); @@ -761,7 +775,7 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); @@ -1337,7 +1351,9 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 9220cf46aa1..c2502eb9aa8 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; - cpumask_of_cpu_ptr(new_mask, cpu); int retval; unsigned int eax, ebx, ecx, edx; unsigned int edx_part; @@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ saved_mask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, new_mask); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) return -1; diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index fa2161d5003..426e5d91b63 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags; /* address in low memory of the wakeup routine. */ static unsigned long acpi_realmode; -#ifdef CONFIG_64BIT +#if defined(CONFIG_SMP) && defined(CONFIG_64BIT) static char temp_stack[10240]; #endif @@ -86,7 +86,7 @@ int acpi_save_state_mem(void) #endif /* !CONFIG_64BIT */ header->pmode_cr0 = read_cr0(); - header->pmode_cr4 = read_cr4(); + header->pmode_cr4 = read_cr4_safe(); header->realmode_flags = acpi_realmode_flags; header->real_magic = 0x12345678; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b55..65a0c1b4869 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { extern char __vsyscall_0; const unsigned char *const *find_nop_table(void) { - return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return k8_nops; } #else /* CONFIG_X86_64 */ -static const struct nop { - int cpuid; - const unsigned char *const *noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { X86_FEATURE_P4, p6_nops }, - { X86_FEATURE_P3, p6_nops }, - { -1, NULL } -}; - const unsigned char *const *find_nop_table(void) { - const unsigned char *const *noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; + if (boot_cpu_has(X86_FEATURE_K8)) + return k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + return k7_nops; + else if (boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return intel_nops; } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c25210e6ac8..69b4d060b21 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -29,9 +29,6 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -#define to_pages(addr, size) \ - (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) - #define EXIT_LOOP_COUNT 10000000 static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -68,7 +65,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) u8 *target; tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - target = (iommu->cmd_buf + tail); + target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); @@ -104,16 +101,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret; + int ret, ready = 0; + unsigned status = 0; struct iommu_cmd cmd; - volatile u64 ready = 0; - unsigned long ready_phys = virt_to_phys(&ready); unsigned long i = 0; memset(&cmd, 0, sizeof(cmd)); - cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; - cmd.data[1] = upper_32_bits(ready_phys); - cmd.data[2] = 1; /* value written to 'ready' */ + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); iommu->need_sync = 0; @@ -125,9 +119,15 @@ static int iommu_completion_wait(struct amd_iommu *iommu) while (!ready && (i < EXIT_LOOP_COUNT)) { ++i; - cpu_relax(); + /* wait for the bit to become one */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; } + /* set bit back to zero */ + status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; + writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); + if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); @@ -164,7 +164,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, address &= PAGE_MASK; CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); cmd.data[1] |= domid; - cmd.data[2] = LOW_U32(address); + cmd.data[2] = lower_32_bits(address); cmd.data[3] = upper_32_bits(address); if (s) /* size bit - we flush more than one 4kb page */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; @@ -185,7 +185,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { int s = 0; - unsigned pages = to_pages(address, size); + unsigned pages = iommu_num_pages(address, size); address &= PAGE_MASK; @@ -557,8 +557,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = to_pages(iommu->exclusion_start, - iommu->exclusion_length); + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length); dma_ops_reserve_addresses(dma_dom, startpage, pages); } @@ -667,7 +667,7 @@ static int get_device_resources(struct device *dev, _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); /* device not translated by any IOMMU in the system? */ - if (_bdf >= amd_iommu_last_bdf) { + if (_bdf > amd_iommu_last_bdf) { *iommu = NULL; *domain = NULL; *bdf = 0xffff; @@ -767,7 +767,7 @@ static dma_addr_t __map_single(struct device *dev, unsigned int pages; int i; - pages = to_pages(paddr, size); + pages = iommu_num_pages(paddr, size); paddr &= PAGE_MASK; address = dma_ops_alloc_addresses(dev, dma_dom, pages); @@ -802,7 +802,7 @@ static void __unmap_single(struct amd_iommu *iommu, if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) return; - pages = to_pages(dma_addr, size); + pages = iommu_num_pages(dma_addr, size); dma_addr &= PAGE_MASK; start = dma_addr; @@ -1085,7 +1085,7 @@ void prealloc_protection_domains(void) while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { devid = (dev->bus->number << 8) | dev->devfn; - if (devid >= amd_iommu_last_bdf) + if (devid > amd_iommu_last_bdf) continue; devid = amd_iommu_alias_table[devid]; if (domain_for_device(devid)) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c9d8ff2eb13..a69cc0f5204 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) set_device_exclusion_range(m->devid, m); break; case ACPI_IVMD_TYPE_ALL: - for (i = 0; i < amd_iommu_last_bdf; ++i) + for (i = 0; i <= amd_iommu_last_bdf; ++i) set_device_exclusion_range(i, m); break; case ACPI_IVMD_TYPE_RANGE: @@ -801,6 +801,21 @@ static int __init init_memory_definitions(struct acpi_table_header *table) } /* + * Init the device table to not allow DMA access for devices and + * suppress all page faults + */ +static void init_device_table(void) +{ + u16 devid; + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + set_dev_entry_bit(devid, DEV_ENTRY_VALID); + set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); + } +} + +/* * This function finally enables all IOMMUs found in the system after * they have been initialized */ @@ -931,10 +946,13 @@ int __init amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto free; + /* init the device table */ + init_device_table(); + /* * let all alias entries point to itself */ - for (i = 0; i < amd_iommu_last_bdf; ++i) + for (i = 0; i <= amd_iommu_last_bdf; ++i) amd_iommu_alias_table[i] = i; /* @@ -954,15 +972,15 @@ int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; - ret = amd_iommu_init_dma_ops(); + ret = sysdev_class_register(&amd_iommu_sysdev_class); if (ret) goto free; - ret = sysdev_class_register(&amd_iommu_sysdev_class); + ret = sysdev_register(&device_amd_iommu); if (ret) goto free; - ret = sysdev_register(&device_amd_iommu); + ret = amd_iommu_init_dma_ops(); if (ret) goto free; diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c89835837..58427210505 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,13 +145,18 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_wait_icr_idle(void) +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -167,16 +172,48 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return icr1 | ((u64)icr2 << 32); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ void __cpuinit enable_NMI_through_LVT0(void) { - unsigned int v = APIC_DM_NMI; + unsigned int v; + + /* unmask and set to NMI */ + v = APIC_DM_NMI; - /* Level triggered for 82489DX */ + /* Level triggered for 82489DX (32bit mode) */ if (!lapic_is_integrated()) v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } @@ -193,9 +230,13 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v = apic_read(APIC_LVR); + unsigned int v; - /* 82489DXs do not report # of LVT entries. */ + v = apic_read(APIC_LVR); + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } @@ -1205,7 +1246,7 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } @@ -1242,7 +1283,7 @@ int __init APIC_init_uniprocessor(void) * might be zero if read from MP tables. Get it from LAPIC. */ #ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); @@ -1321,54 +1362,6 @@ void smp_error_interrupt(struct pt_regs *regs) irq_exit(); } -#ifdef CONFIG_SMP -void __init smp_intr_init(void) -{ - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); -} -#endif - -/* - * Initialize APIC interrupts - */ -void __init apic_intr_init(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -} - /** * connect_bsp_APIC - attach the APIC to the interrupt system */ @@ -1454,8 +1447,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; @@ -1482,12 +1473,6 @@ void __cpuinit generic_processor_info(int apicid, int version) return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); @@ -1720,15 +1705,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static int __init apic_set_verbosity(char *str) +static int __init apic_set_verbosity(char *arg) { - if (strcmp("debug", str) == 0) + if (!arg) + return -EINVAL; + + if (strcmp(arg, "debug") == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) + else if (strcmp(arg, "verbose") == 0) apic_verbosity = APIC_VERBOSE; - return 1; + + return 0; } -__setup("apic=", apic_set_verbosity); +early_param("apic", apic_set_verbosity); static int __init lapic_insert_resource(void) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7e..1a6011855af 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -27,6 +27,7 @@ #include <linux/clockchips.h> #include <linux/acpi_pmtmr.h> #include <linux/module.h> +#include <linux/dmar.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -39,6 +40,7 @@ #include <asm/proto.h> #include <asm/timex.h> #include <asm/apic.h> +#include <asm/i8259.h> #include <mach_ipi.h> #include <mach_apic.h> @@ -46,6 +48,11 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int disable_x2apic; +int x2apic; + +/* x2apic enabled before OS handover */ +int x2apic_preenabled; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; @@ -90,7 +97,6 @@ static unsigned long apic_phys; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ @@ -119,13 +125,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -141,6 +147,69 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return (icr1 | ((u64)icr2 << 32)); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; + +EXPORT_SYMBOL_GPL(apic_ops); + +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -150,6 +219,11 @@ void __cpuinit enable_NMI_through_LVT0(void) /* unmask and set to NMI */ v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ + if (!lapic_is_integrated()) + v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } @@ -158,11 +232,14 @@ void __cpuinit enable_NMI_through_LVT0(void) */ int lapic_get_maxlvt(void) { - unsigned int v, maxlvt; + unsigned int v; v = apic_read(APIC_LVR); - maxlvt = GET_APIC_MAXLVT(v); - return maxlvt; + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } /* @@ -630,10 +707,10 @@ int __init verify_local_APIC(void) /* * The ID register is read/write in a real APIC. */ - reg0 = read_apic_id(); + reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = read_apic_id(); + reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) @@ -834,6 +911,125 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + save_mask_IO_APIC_setup(); + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } +end: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} + /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -873,7 +1069,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /** @@ -881,6 +1077,11 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { + if (x2apic) { + boot_cpu_physical_apicid = read_apic_id(); + return; + } + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -900,7 +1101,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /* @@ -919,6 +1120,9 @@ int __init APIC_init_uniprocessor(void) return -1; } + enable_IR_x2apic(); + setup_apic_routing(); + verify_local_APIC(); connect_bsp_APIC(); @@ -1062,12 +1266,6 @@ void __cpuinit generic_processor_info(int apicid, int version) return; } - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - num_processors++; cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); @@ -1100,6 +1298,11 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} + /* * Power management */ @@ -1136,7 +1339,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = read_apic_id(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); @@ -1171,10 +1374,14 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (!x2apic) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } else + enable_x2apic(); + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -1314,6 +1521,15 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); + + /* * APIC command line parameters */ diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index aa89387006f..505543a75a5 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -22,7 +22,7 @@ #define __NO_STUBS 1 #undef __SYSCALL -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #define __SYSCALL(nr, sym) [nr] = 1, static char syscalls[] = { #include <asm/unistd.h> diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ee76eaad300..510d1bcb058 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,22 +3,31 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o feature_names.o - -obj-$(CONFIG_X86_32) += common.o bugs.o -obj-$(CONFIG_X86_64) += common_64.o bugs_64.o -obj-$(CONFIG_X86_32) += amd.o -obj-$(CONFIG_X86_64) += amd_64.o -obj-$(CONFIG_X86_32) += cyrix.o -obj-$(CONFIG_X86_32) += centaur.o -obj-$(CONFIG_X86_64) += centaur_64.o -obj-$(CONFIG_X86_32) += transmeta.o -obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_64) += intel_64.o -obj-$(CONFIG_X86_32) += umc.o +obj-y += proc.o capflags.o powerflags.o common.o + +obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o +obj-$(CONFIG_X86_64) += bugs_64.o + +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_AMD) += amd.o +obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o +obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o +obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o +obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o +obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o + +quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ + +cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h + +targets += capflags.c +$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE + $(call if_changed,mkcapflags) diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a607..0d9c993aa93 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,6 +7,8 @@ #include <asm/pat.h> #include <asm/processor.h> +#include <mach_apic.h> + struct cpuid_bit { u16 feature; u8 reg; @@ -48,6 +50,92 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) } } +/* leaf 0xb SMT level */ +#define SMT_LEVEL 0 + +/* leaf 0xb sub-leaf types */ +#define INVALID_TYPE 0 +#define SMT_TYPE 1 +#define CORE_TYPE 2 + +#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) +#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) +#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) + +/* + * Check for extended topology enumeration cpuid leaf 0xb and if it + * exists, use it for populating initial_apicid and cpu topology + * detection. + */ +void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int eax, ebx, ecx, edx, sub_index; + unsigned int ht_mask_width, core_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; + + if (c->cpuid_level < 0xb) + return; + + cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + + /* + * check if the cpuid leaf 0xb is actually implemented. + */ + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) + return; + + set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); + + /* + * initial apic id, which also represents 32-bit extended x2apic id. + */ + c->initial_apicid = edx; + + /* + * Populate HT related information from sub-leaf level 0. + */ + core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + + sub_index = 1; + do { + cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + + /* + * Check for the Core type in the implemented sub leaves. + */ + if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + break; + } + + sub_index++; + } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); + + core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; + +#ifdef CONFIG_X86_32 + c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) + & core_select_mask; + c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); +#else + c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; + c->phys_proc_id = phys_pkg_id(core_plus_mask_width); +#endif + c->x86_max_cores = (core_level_siblings / smp_num_siblings); + + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + return; +#endif +} + #ifdef CONFIG_X86_PAT void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) { @@ -56,9 +144,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: - if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + /* + * There is a known erratum on Pentium III and Core Solo + * and Core Duo CPUs. + * " Page with PAT set to WC while associated MTRR is UC + * may consolidate to UC " + * Because of this erratum, it is better to stick with + * setting WC in MTRR rather than using PAT on these CPUs. + * + * Enable PAT WC only on P4, Core 2 or later CPUs. + */ + if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) return; - break; + + pat_disable("PAT WC disabled due to known CPU erratum."); + return; + case X86_VENDOR_AMD: case X86_VENDOR_CENTAUR: case X86_VENDOR_TRANSMETA: diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cae9cabc303..32e73520adf 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1,13 +1,22 @@ #include <linux/init.h> #include <linux/bitops.h> #include <linux/mm.h> + #include <asm/io.h> #include <asm/processor.h> #include <asm/apic.h> +#ifdef CONFIG_X86_64 +# include <asm/numa_64.h> +# include <asm/mmconfig.h> +# include <asm/cacheflush.h> +#endif + #include <mach_apic.h> + #include "cpu.h" +#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -24,21 +33,273 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) { - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); } } -static void __cpuinit init_amd(struct cpuinfo_x86 *c) + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) { u32 l, h; int mbytes = num_physpages >> (20-PAGE_SHIFT); - int r; + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits; + + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; +#endif +} + +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; +#endif +} + +static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +{ + early_init_amd_mc(c); + + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else + /* Set MTRR capability flag if appropriate */ + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif +} + +static void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ #ifdef CONFIG_SMP unsigned long long value; @@ -49,7 +310,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ - if (c->x86 == 15) { + if (c->x86 == 0xf) { rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; wrmsrl(MSR_K7_HWCR, value); @@ -59,213 +320,119 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) early_init_amd(c); /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - /* * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); - r = get_model_name(c); +#ifdef CONFIG_X86_64 + /* On C+ stepping K8 rep microcode works well for copy/memset */ + if (c->x86 == 0xf) { + u32 level; - switch (c->x86) { - case 4: - /* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } - break; - case 5: - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - break; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - break; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); - break; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - break; - } - break; - case 6: /* An Athlon/Duron */ - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - break; + level = cpuid_eax(1); + if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); } + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ switch (c->x86) { - case 15: - /* Use K8 tuning for Fam10h and Fam11h */ - case 0x10: - case 0x11: - set_cpu_cap(c, X86_FEATURE_K8); + case 4: + init_amd_k5(c); break; - case 6: - set_cpu_cap(c, X86_FEATURE_K7); + case 5: + init_amd_k6(c); + break; + case 6: /* An Athlon/Duron */ + init_amd_k7(c); break; } + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); +#endif + + /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - display_cacheinfo(c); - - if (cpuid_eax(0x80000000) >= 0x80000008) - c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (!c->x86_model_id[0]) { + switch (c->x86) { + case 0xf: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } -#ifdef CONFIG_X86_HT - /* - * On a AMD multi core setup the lower bits of the APIC id - * distinguish the cores. - */ - if (c->x86_max_cores > 1) { - int cpu = smp_processor_id(); - unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + display_cacheinfo(c); - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1); - c->phys_proc_id >>= bits; - printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_max_cores, c->cpu_core_id); + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) { + amd_detect_cmp(c); + srat_detect_node(c); } + +#ifdef CONFIG_X86_32 + detect_ht(c); #endif - if (cpuid_eax(0x80000000) >= 0x80000006) { - if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; else num_cache_leaves = 3; } - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_K8); - if (cpu_has_xmm2) + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } + +#ifdef CONFIG_X86_64 + if (c->x86 == 0x10) { + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); + } + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if ((tseg>>PMD_SHIFT) < + (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || + ((tseg>>PMD_SHIFT) < + (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && + (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) + set_memory_4k((unsigned long)__va(tseg), 1); + } + } +#endif } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ @@ -278,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int } return size; } +#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = { @@ -294,9 +463,11 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, - .c_size_cache = amd_size_cache, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c deleted file mode 100644 index d1692b2a41f..00000000000 --- a/arch/x86/kernel/cpu/amd_64.c +++ /dev/null @@ -1,224 +0,0 @@ -#include <linux/init.h> -#include <linux/mm.h> - -#include <asm/numa_64.h> -#include <asm/mmconfig.h> -#include <asm/cacheflush.h> - -#include <mach_apic.h> - -#include "cpu.h" - -int force_mwait __cpuinitdata; - -#ifdef CONFIG_NUMA -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits; -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node = 0; - unsigned apicid = hard_smp_processor_id(); -#endif - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; - -#ifdef CONFIG_NUMA - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; - -#endif -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - - set_cpu_cap(c, X86_FEATURE_SYSCALL32); -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ - unsigned level; - -#ifdef CONFIG_SMP - unsigned long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 0xf) { - rdmsrl(MSR_K8_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K8_HWCR, value); - } -#endif - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_cpu_cap(c, 0*32+31); - - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - level = cpuid_eax(1); - if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - level = get_model_name(c); - if (!level) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - display_cacheinfo(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) - amd_detect_cmp(c); - - if (c->extended_cpuid_level >= 0x80000006 && - (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_K8); - - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -} - -static struct cpu_dev amd_cpu_dev __cpuinitdata = { - .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, - .c_early_init = early_init_amd, - .c_init = init_amd, -}; - -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); - diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9b58a806e8..c8e315f1aa8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -50,6 +50,8 @@ static double __initdata y = 3145727.0; */ static void __init check_fpu(void) { + s32 fdiv_bug; + if (!boot_cpu_data.hard_math) { #ifndef CONFIG_MATH_EMULATION printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); @@ -74,8 +76,10 @@ static void __init check_fpu(void) "fistpl %0\n\t" "fwait\n\t" "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) + : "=m" (*&fdiv_bug) : "m" (*&x), "m" (*&y)); + + boot_cpu_data.fdiv_bug = fdiv_bug; if (boot_cpu_data.fdiv_bug) printk("Hmm, FPU with FDIV bug.\n"); } diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a5..89bfdd9cacc 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) if (c->x86_model >= 6 && c->x86_model < 9) set_cpu_cap(c, X86_FEATURE_3DNOW); - get_model_name(c); display_cacheinfo(c); } @@ -314,6 +313,16 @@ enum { EAMD3D = 1<<20, }; +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 5: + /* Emulate MTRRs using Centaur's MCR. */ + set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); + break; + } +} + static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { @@ -462,8 +471,10 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 1d181c40e2e..0e5cf17a3c8 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -16,6 +16,8 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { + early_init_centaur(c); + if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); @@ -29,7 +31,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, .c_init = init_centaur, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c new file mode 100644 index 00000000000..2056ccf572c --- /dev/null +++ b/arch/x86/kernel/cpu/cmpxchg.c @@ -0,0 +1,72 @@ +/* + * cmpxchg*() fallbacks for CPU not supporting these instructions + */ + +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/module.h> + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa3..9a57106c199 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,27 +1,62 @@ #include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> #include <linux/string.h> +#include <linux/bootmem.h> +#include <linux/bitops.h> +#include <linux/module.h> +#include <linux/kgdb.h> +#include <linux/topology.h> #include <linux/delay.h> #include <linux/smp.h> -#include <linux/module.h> #include <linux/percpu.h> -#include <linux/bootmem.h> -#include <asm/processor.h> #include <asm/i387.h> #include <asm/msr.h> #include <asm/io.h> +#include <asm/linkage.h> #include <asm/mmu_context.h> #include <asm/mtrr.h> #include <asm/mce.h> #include <asm/pat.h> +#include <asm/asm.h> +#include <asm/numa.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/mpspec.h> #include <asm/apic.h> #include <mach_apic.h> +#include <asm/genapic.h> #endif +#include <asm/pda.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/desc.h> +#include <asm/atomic.h> +#include <asm/proto.h> +#include <asm/sections.h> +#include <asm/setup.h> + #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + +#ifdef CONFIG_X86_64 +/* We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + */ +/* The TLS descriptors are currently at a different place compared to i386. + Hopefully nobody expects them at a fixed place (Wine?) */ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +} }; +#else +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, @@ -55,17 +90,150 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, } }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; - +#ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); +#else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} +#endif + +/* + * Naming convention should be: <Name> [(<Codename>)] + * This table only is used unless init_<vendor>() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; + +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif +} + +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else /* Not much we can do here... */ /* Check if at least it has cpuid */ if (c->cpuid_level == -1) { @@ -75,28 +243,22 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) else if (c->x86 == 3) strcpy(c->x86_model_id, "386"); } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); -int __cpuinit get_model_name(struct cpuinfo_x86 *c) +static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; - if (cpuid_eax(0x80000000) < 0x80000004) - return 0; + if (c->extended_cpuid_level < 0x80000004) + return; v = (unsigned int *) c->x86_model_id; cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); @@ -115,30 +277,34 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) while (q <= &c->x86_model_id[48]) *q++ = '\0'; /* Zero-pad the rest */ } - - return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ecx, edx, l2size; + unsigned int n, dummy, ebx, ecx, edx, l2size; - n = cpuid_eax(0x80000000); + n = c->extended_cpuid_level; if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24)+(edx>>24); + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 + /* On K8 L1 TLB is inclusive, so don't count it */ + c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ return; - ecx = cpuid_ecx(0x80000006); + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; +#ifdef CONFIG_X86_64 + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -149,116 +315,106 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (l2size == 0) return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); + l2size, ecx & 0xFF); } -/* - * Naming convention should be: <Name> [(<Codename>)] - * This table only is used unless init_<vendor>() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +void __cpuinit detect_ht(struct cpuinfo_x86 *c) { - struct cpu_model_info *info; +#ifdef CONFIG_X86_HT + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; - if (c->x86_model >= 16) - return NULL; /* Range check */ + if (!cpu_has(c, X86_FEATURE_HT)) + return; - if (!this_cpu) - return NULL; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; - info = this_cpu->c_models; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; + cpuid(1, &eax, &ebx, &ecx, &edx); + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 + c->phys_proc_id = phys_pkg_id(index_msb); +#else + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); +#endif + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings); + + core_bits = get_count_order(c->x86_max_cores); + +#ifdef CONFIG_X86_64 + c->cpu_core_id = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); +#else + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); +#endif } - return NULL; /* Not found */ -} +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } +#endif +} -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; int i; static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - if (!early) - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -void __init cpu_detect(struct cpuinfo_x86 *c) +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, @@ -267,50 +423,68 @@ void __init cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[4]); c->x86 = 4; + /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; + c->x86 = (tfms >> 8) & 0xf; + c->x86_model = (tfms >> 4) & 0xf; + c->x86_mask = tfms & 0xf; if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; + c->x86_model += ((tfms >> 16) & 0xf) << 4; if (cap0 & (1<<19)) { - c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; + c->x86_cache_alignment = c->x86_clflush_size; } } } -static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; - unsigned int ebx; + u32 ebx; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (have_cpuid_p()) { - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); } + } +#ifdef CONFIG_X86_64 + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + /* Don't set x86_cpuid_level here for now to not confuse. */ + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); } -} + if (c->extended_cpuid_level >= 0x80000008) { + u32 eax = cpuid_eax(0x80000008); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } +#endif + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + +} /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -320,109 +494,126 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) * WARNING: this function is only called on the BP. Don't add code here * that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - - c->x86_cache_alignment = 32; +#ifdef CONFIG_X86_64 + c->x86_clflush_size = 64; +#else c->x86_clflush_size = 32; +#endif + c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) return; + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + c->extended_cpuid_level = 0; + cpu_detect(c); - get_cpu_vendor(c, 1); + get_cpu_vendor(c); + + get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); - early_get_cap(c); + validate_pat_support(c); } -static void __cpuinit generic_identify(struct cpuinfo_x86 *c) +void __init early_cpu_init(void) { - u32 tfms, xlvl; - unsigned int ebx; - - if (have_cpuid_p()) { - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c, 0); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; - c->initial_apicid = (ebx >> 24) & 0xFF; -#ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; -#else - c->apicid = c->initial_apicid; -#endif - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) - c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ + struct cpu_dev **cdev; + int count = 0; + + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; + + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); } - - init_scattered_cpuid_features(c); } + early_identify_cpu(&boot_cpu_data); } -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); } } -static int __init x86_serial_nr_setup(char *s) +static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); + if (!have_cpuid_p()) + return; + + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id(c->initial_apicid, 0); +# else + c->apicid = c->initial_apicid; +# endif +#endif + +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; +#endif + } + get_model_name(c); /* Default name */ + init_scattered_cpuid_features(c); + detect_nopl(c); +} /* * This does the hard work of actually picking apart the CPU stuff... @@ -434,12 +625,18 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; + c->x86_coreid_bits = 0; +#ifdef CONFIG_X86_64 + c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; +#endif + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -458,6 +655,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) this_cpu->c_identify(c); +#ifdef CONFIG_X86_64 + c->apicid = phys_pkg_id(0); +#endif + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -491,6 +692,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } +#ifdef CONFIG_X86_64 + detect_ht(c); +#endif + /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -499,7 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) */ if (c != &boot_cpu_data) { /* AND the already accumulated flags with these */ - for (i = 0 ; i < NCAPINTS ; i++) + for (i = 0; i < NCAPINTS; i++) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } @@ -507,72 +712,79 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) for (i = 0; i < NCAPINTS; i++) c->x86_capability[i] &= ~cleared_cpu_caps[i]; +#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_init(c); +#endif select_idle_routine(c); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + numa_add_cpu(smp_processor_id()); +#endif } void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 enable_sep_cpu(); +#endif mtrr_ap_init(); } -#ifdef CONFIG_X86_HT -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; - - smp_num_siblings = (ebx & 0xff0000) >> 16; +struct msr_range { + unsigned min; + unsigned max; +}; - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); } + } +} - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings) ; - - core_bits = get_count_order(c->x86_max_cores); +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); + get_option(&arg, &num); - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } + if (num > 0) + show_msr = num; + return 1; } -#endif +__setup("show_msr=", setup_show_msr); static __init int setup_noclflush(char *arg) { @@ -591,17 +803,25 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) vendor = c->x86_vendor_id; if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); + printk(KERN_CONT "%s ", vendor); - if (!c->x86_model_id[0]) - printk("%d86", c->x86); + if (c->x86_model_id[0]) + printk(KERN_CONT "%s", c->x86_model_id); else - printk("%s", c->x86_model_id); + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); + printk(KERN_CONT " stepping %02x\n", c->x86_mask); else - printk("\n"); + printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) @@ -617,19 +837,89 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -void __init early_cpu_init(void) +#ifdef CONFIG_X86_64 +struct x8664_pda **_cpu_pda __read_mostly; +EXPORT_SYMBOL(_cpu_pda); + +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +void __cpuinit pda_init(int cpu) { - struct cpu_vendor_dev *cvdev; + struct x8664_pda *pda = cpu_pda(cpu); + + /* Setup up data that may be needed in __get_free_pages early */ + loadsegment(fs, 0); + loadsegment(gs, 0); + /* Memory clobbers used to order PDA accessed */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); + + pda->cpunumber = cpu; + pda->irqcount = -1; + pda->kernelstack = (unsigned long)stack_thread_info() - + PDA_STACKOFFSET + THREAD_SIZE; + pda->active_mm = &init_mm; + pda->mmu_state = 0; + + if (cpu == 0) { + /* others are initialized in smpboot.c */ + pda->pcurrent = &init_task; + pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; + } else { + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } + + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } +} + +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + + DEBUG_STKSZ] __page_aligned_bss; - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; +extern asmlinkage void ignore_sysret(void); - early_cpu_detect(); - validate_pat_support(&boot_cpu_data); +/* May not be marked __init: used by software suspend */ +void syscall_init(void) +{ + /* + * LSTAR and STAR live in a bit strange symbiosis. + * They both write to the same internal register. STAR allows to + * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_CSTAR, ignore_sysret); + +#ifdef CONFIG_IA32_EMULATION + syscall32_cpu_init(); +#endif + + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } +unsigned long kernel_eflags; + +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +#else + /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -637,25 +927,136 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) regs->fs = __KERNEL_PERCPU; return regs; } - -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -} +#endif /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 +void __cpuinit cpu_init(void) +{ + int cpu = stack_smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; + struct task_struct *me; + int i; + + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) + pda_init(cpu); + else + estacks = boot_exception_stacks; + + me = current; + + if (cpu_test_and_set(cpu, cpu_initialized)) + panic("CPU#%d already initialized!\n", cpu); + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + switch_to_new_gdt(); + load_idt((const struct desc_ptr *)&idt_descr); + + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); + + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); + + check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); + + /* + * set up and load the per-CPU TSS + */ + if (!orig_ist->ist[0]) { + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; + } + } + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. + */ + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; + if (me->mm) + BUG(); + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_KGDB + /* + * If the kgdb is connected no debug regs should be altered. This + * is only applicable when KGDB and a KGDB I/O module are built + * into the kernel and you are using early debugging with + * kgdbwait. KGDB will control the kernel HW breakpoint registers. + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + else { +#endif + /* + * Clear all 6 debug registers: + */ + + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); +#ifdef CONFIG_KGDB + /* If the kgdb is connected no debug regs should be altered. */ + } +#endif + + fpu_init(); + + raw_local_save_flags(kernel_eflags); + + if (is_uv_system()) + uv_cpu_init(); +} + +#else + void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); @@ -709,9 +1110,20 @@ void __cpuinit cpu_init(void) /* * Force FPU initialization: */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); } #ifdef CONFIG_HOTPLUG_CPU @@ -725,3 +1137,5 @@ void __cpuinit cpu_uninit(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } #endif + +#endif diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c deleted file mode 100644 index dd6e3f15017..00000000000 --- a/arch/x86/kernel/cpu/common_64.c +++ /dev/null @@ -1,670 +0,0 @@ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/bootmem.h> -#include <linux/bitops.h> -#include <linux/module.h> -#include <linux/kgdb.h> -#include <linux/topology.h> -#include <linux/delay.h> -#include <linux/smp.h> -#include <linux/percpu.h> -#include <asm/i387.h> -#include <asm/msr.h> -#include <asm/io.h> -#include <asm/linkage.h> -#include <asm/mmu_context.h> -#include <asm/mtrr.h> -#include <asm/mce.h> -#include <asm/pat.h> -#include <asm/numa.h> -#ifdef CONFIG_X86_LOCAL_APIC -#include <asm/mpspec.h> -#include <asm/apic.h> -#include <mach_apic.h> -#endif -#include <asm/pda.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/desc.h> -#include <asm/atomic.h> -#include <asm/proto.h> -#include <asm/sections.h> -#include <asm/setup.h> -#include <asm/genapic.h> - -#include "cpu.h" - -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; - -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -} - -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; - -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ - display_cacheinfo(c); -} - -static struct cpu_dev __cpuinitdata default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", -}; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; - -int __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - - if (c->extended_cpuid_level < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - return 1; -} - - -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ebx, ecx, edx; - - n = c->extended_cpuid_level; - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " - "D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24) + (edx>>24); - /* On K8 L1 TLB is inclusive, so don't count it */ - c->x86_tlbsize = 0; - } - - if (n >= 0x80000006) { - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - ecx = cpuid_ecx(0x80000006); - c->x86_cache_size = ecx >> 16; - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - c->x86_cache_size, ecx & 0xFF); - } -} - -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - - if (!cpu_has(c, X86_FEATURE_HT)) - return; - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(index_msb); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); - } -out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } - -#endif -} - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - int i; - static int printed; - - for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } - } - } - if (!printed) { - printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); - printk(KERN_ERR "CPU: Your system may be unstable.\n"); - } - c->x86_vendor = X86_VENDOR_UNKNOWN; -} - -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); - -void __init early_cpu_init(void) -{ - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_support_print(); - early_identify_cpu(&boot_cpu_data); -} - -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; - c->x86_max_cores = 1; - c->x86_coreid_bits = 0; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c); - - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - __u32 misc; - cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], - &c->x86_capability[0]); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; -#endif - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ - } - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - - if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } - - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); - - validate_pat_support(c); -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - early_identify_cpu(c); - - init_scattered_cpuid_features(c); - - c->apicid = phys_pkg_id(0); - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - if (this_cpu->c_init) - this_cpu->c_init(c); - - detect_ht(c); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if (c != &boot_cpu_data) { - /* AND the already accumulated flags with these */ - for (i = 0; i < NCAPINTS; i++) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Clear all flags overriden by options */ - for (i = 0; i < NCAPINTS; i++) - c->x86_capability[i] &= ~cleared_cpu_caps[i]; - -#ifdef CONFIG_X86_MCE - mcheck_init(c); -#endif - select_idle_routine(c); - -#ifdef CONFIG_NUMA - numa_add_cpu(smp_processor_id()); -#endif - -} - -void __cpuinit identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); - mtrr_ap_init(); -} - -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); - else - printk(KERN_CONT "\n"); -} - -static __init int setup_disablecpuid(char *arg) -{ - int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) - setup_clear_cpu_cap(bit); - else - return 0; - return 1; -} -__setup("clearcpuid=", setup_disablecpuid); - -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; - -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; - -char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; - -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - -void pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); - - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); - - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } - - pda->irqstackptr += IRQSTACKSIZE-64; -} - -char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; - -extern asmlinkage void ignore_sysret(void); - -/* May not be marked __init: used by software suspend */ -void syscall_init(void) -{ - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to - * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); - -#ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); -#endif - - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); -} - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - -unsigned long kernel_eflags; - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init. - */ -void __cpuinit cpu_init(void) -{ - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; - char *estacks = NULL; - struct task_struct *me; - int i; - - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); - else - estacks = boot_exception_stacks; - - me = current; - - if (cpu_test_and_set(cpu, cpu_initialized)) - panic("CPU#%d already initialized!\n", cpu); - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - - switch_to_new_gdt(); - load_idt((const struct desc_ptr *)&idt_descr); - - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - check_efer(); - - /* - * set up and load the per-CPU TSS - */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER - }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); - } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; - } - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - - atomic_inc(&init_mm.mm_count); - me->active_mm = &init_mm; - if (me->mm) - BUG(); - enter_lazy_tlb(&init_mm, me); - - load_sp0(t, ¤t->thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_KGDB - /* - * If the kgdb is connected no debug regs should be altered. This - * is only applicable when KGDB and a KGDB I/O module are built - * into the kernel and you are using early debugging with - * kgdbwait. KGDB will control the kernel HW breakpoint registers. - */ - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - else { -#endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ - } -#endif - - fpu_init(); - - raw_local_save_flags(kernel_eflags); - - if (is_uv_system()) - uv_cpu_init(); -} diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 4d894e8565f..de4094a3921 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -21,23 +21,16 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 * c); void (*c_identify)(struct cpuinfo_x86 * c); unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); + int c_x86_vendor; }; -extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; +#define cpu_dev_register(cpu_devX) \ + static struct cpu_dev *__cpu_dev_##cpu_devX __used \ + __attribute__((__section__(".x86_cpu_dev.init"))) = \ + &cpu_devX; -struct cpu_vendor_dev { - int vendor; - struct cpu_dev *cpu_dev; -}; - -#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \ - static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \ - __attribute__((__section__(".x86cpuvendor.init"))) = \ - { cpu_vendor_id, cpu_dev } - -extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; +extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; -extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index cb7a5715596..efae3b22a0f 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -235,9 +235,9 @@ config X86_LONGHAUL If in doubt, say N. config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" + tristate "VIA C7 Enhanced PowerSaver" select CPU_FREQ_TABLE - depends on X86_32 && EXPERIMENTAL + depends on X86_32 help This adds the CPUFreq driver for VIA C7 processors. diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ff2fff56f0a..dd097b83583 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { cpumask_t saved_mask = current->cpus_allowed; - cpumask_of_cpu_ptr_declare(cpu_mask); unsigned int i; for_each_cpu_mask_nr(i, cmd->mask) { - cpumask_of_cpu_ptr_next(cpu_mask, i); - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); do_drv_write(cmd); } @@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu) } aperf_cur, mperf_cur; cpumask_t saved_mask; - cpumask_of_cpu_ptr(cpu_mask, cpu); unsigned int perf_percent; unsigned int retval; saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (get_cpu() != cpu) { /* We were not able to run on requested processor */ put_cpu(); @@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - cpumask_of_cpu_ptr(cpu_mask, cpu); struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; unsigned int cached_freq; @@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) } cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(cpu_mask), data); + freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 94619c22f56..e4a4bf870e9 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -44,7 +44,7 @@ struct s_elan_multiplier { * It is important that the frequencies * are listed in ascending order here! */ -struct s_elan_multiplier elan_multiplier[] = { +static struct s_elan_multiplier elan_multiplier[] = { {1000, 0x02, 0x18}, {2000, 0x02, 0x10}, {4000, 0x02, 0x08}, diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 53c7b693697..84bb395038d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid) return 800 + (fid * 100); } - /* Return a frequency in KHz, given an input fid */ static u32 find_khz_freq_from_fid(u32 fid) { @@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p return data[pstate].frequency; } - /* Return the vco fid for an input fid * * Each "low" fid has corresponding "high" fid, and you can get to "low" fids @@ -166,7 +164,6 @@ static void fidvid_msr_init(void) wrmsr(MSR_FIDVID_CTL, lo, hi); } - /* write the new fid value along with the other control fields to the msr */ static int write_new_fid(struct powernow_k8_data *data, u32 fid) { @@ -479,12 +476,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi static int check_supported_cpu(unsigned int cpu) { cpumask_t oldmask; - cpumask_of_cpu_ptr(cpu_mask, cpu); u32 eax, ebx, ecx, edx; unsigned int rc = 0; oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); @@ -1017,7 +1013,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { cpumask_t oldmask; - cpumask_of_cpu_ptr(cpu_mask, pol->cpu); struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1032,7 +1027,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, cpu_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1107,7 +1102,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; - cpumask_of_cpu_ptr_declare(newmask); int rc; if (!cpu_online(pol->cpu)) @@ -1159,8 +1153,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; - cpumask_of_cpu_ptr_next(newmask, pol->cpu); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1182,7 +1175,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed_ptr(current, &oldmask); if (cpu_family == CPU_HW_PSTATE) - pol->cpus = *newmask; + pol->cpus = cpumask_of_cpu(pol->cpu); else pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); @@ -1248,7 +1241,6 @@ static unsigned int powernowk8_get (unsigned int cpu) { struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; - cpumask_of_cpu_ptr(newmask, cpu); unsigned int khz = 0; unsigned int first; @@ -1258,7 +1250,7 @@ static unsigned int powernowk8_get (unsigned int cpu) if (!data) return -EINVAL; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index ca2ac13b7af..15e13c01cc3 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu) unsigned l, h; unsigned clock_freq; cpumask_t saved_mask; - cpumask_of_cpu_ptr(new_mask, cpu); saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) return 0; @@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy, * Best effort undo.. */ - if (!cpus_empty(*covered_cpus)) { - cpumask_of_cpu_ptr_declare(new_mask); - + if (!cpus_empty(*covered_cpus)) for_each_cpu_mask_nr(j, *covered_cpus) { - cpumask_of_cpu_ptr_next(new_mask, j); - set_cpus_allowed_ptr(current, new_mask); + set_cpus_allowed_ptr(current, + &cpumask_of_cpu(j)); wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); } - } tmp = freqs.new; freqs.new = freqs.old; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2f3728dc24f..191f7263c61 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) static unsigned int speedstep_get(unsigned int cpu) { - cpumask_of_cpu_ptr(newmask, cpu); - return _speedstep_get(newmask); + return _speedstep_get(&cpumask_of_cpu(cpu)); } /** diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06..ffd0f5ed071 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -15,13 +15,11 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; - unsigned long flags; /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, ccr3 ^ 0x80); getCx86(0xc0); /* dummy to change bus */ @@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) *dir0 = getCx86(CX86_DIR0); *dir1 = getCx86(CX86_DIR1); } - local_irq_restore(flags); } +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned long flags; + + local_irq_save(flags); + __do_cyrix_devid(dir0, dir1); + local_irq_restore(flags); +} /* * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in * order to identify the Cyrix CPU model after we're out of setup.c @@ -116,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); + setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -127,28 +132,11 @@ static void __cpuinit set_cx86_memwb(void) printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); -} - -static void __cpuinit set_cx86_inc(void) -{ - unsigned char ccr3; - - printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - /* PCR1 -- Performance Control */ - /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); - /* PCR0 -- Performance Control */ - /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); } /* @@ -162,23 +150,40 @@ static void __cpuinit geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); set_cx86_reorder(); - set_cx86_inc(); local_irq_restore(flags); } +static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir1 = 0; + + __do_cyrix_devid(&dir0, &dir1); + dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + + switch (dir0_msn) { + case 3: /* 6x86/6x86L */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + case 5: /* 6x86MX/M II */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + } +} static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { @@ -286,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -296,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) */ if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) geode_configure(); - get_model_name(c); /* get CPU marketing name */ return; } else { /* MediaGX */ Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; @@ -309,7 +313,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); } else { c->coma_bug = 1; /* 6x86MX, it has the bug. */ } @@ -424,7 +428,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ local_irq_restore(flags); } @@ -434,16 +438,19 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, + .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, + .c_x86_vendor = X86_VENDOR_CYRIX, }; -cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); +cpu_dev_register(cyrix_cpu_dev); static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, + .c_x86_vendor = X86_VENDOR_NSC, }; -cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); +cpu_dev_register(nsc_cpu_dev); diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c deleted file mode 100644 index e43ad4ad4cb..00000000000 --- a/arch/x86/kernel/cpu/feature_names.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Strings for the various x86 capability flags. - * - * This file must not contain any executable code. - */ - -#include <asm/cpufeature.h> - -/* - * These flag bits must match the definitions in <asm/cpufeature.h>. - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu/<cpu_nr>/cpuid instead. - */ -const char * const x86_cap_flags[NCAPINTS*32] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", - "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, "arch_perfmon", - "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", - "cr8_legacy", "abm", "sse4a", "misalignsse", - "3dnowprefetch", "osvw", "ibs", "sse5", - "skinit", "wdt", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Auxiliary (Linux-defined) */ - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -}; - -const char *const x86_power_flags[32] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* tsc invariant mapped to constant_tsc */ - /* nothing */ -}; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f113ef4595f..a66989586a8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -23,13 +23,6 @@ #include <mach_apic.h> #endif -#ifdef CONFIG_X86_INTEL_USERCOPY -/* - * Alignment at which movsl is preferred for bulk memory copies. - */ -struct movsl_mask movsl_mask __read_mostly; -#endif - static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ @@ -83,7 +76,7 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -183,9 +176,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - c->x86_max_cores = num_cpu_cores(c); + detect_extended_topology(c); - detect_ht(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = intel_num_cpu_cores(c); + detect_ht(c); + } /* Work around errata */ Intel_errata_workarounds(c); @@ -210,9 +210,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) { + if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); - } if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { @@ -311,73 +310,10 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_early_init = early_init_intel, .c_init = init_intel, .c_size_cache = intel_size_cache, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); - -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -#ifndef CONFIG_X86_CMPXCHG64 -unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) -{ - u64 prev; - unsigned long flags; - - /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u64 *)ptr; - if (prev == old) - *(u64 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_486_u64); -#endif +cpu_dev_register(intel_cpu_dev); /* arch_initcall(intel_cpu_init); */ diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f..aef4f282631 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -54,6 +54,8 @@ static void __cpuinit srat_detect_node(void) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + early_init_intel(c); + init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); @@ -69,18 +71,19 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); - } - - - if (cpu_has_bts) ds_init_intel(c); + } if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - c->x86_max_cores = intel_num_cpu_cores(c); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); } @@ -90,6 +93,7 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_ident = { "GenuineIntel" }, .c_early_init = early_init_intel, .c_init = init_intel, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 650d40f7912..3f46afbb1cf 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1,8 +1,8 @@ /* - * Routines to indentify caches on Intel CPU. + * Routines to indentify caches on Intel CPU. * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ @@ -13,6 +13,7 @@ #include <linux/compiler.h> #include <linux/cpu.h> #include <linux/sched.h> +#include <linux/pci.h> #include <asm/processor.h> #include <asm/smp.h> @@ -130,9 +131,18 @@ struct _cpuid4_info { union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned long size; + unsigned long can_disable; cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ }; +#ifdef CONFIG_PCI +static struct pci_device_id k8_nb_id[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} +}; +#endif + unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -182,9 +192,10 @@ static unsigned short assocs[] __cpuinitdata = { static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) +static void __cpuinit +amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) { unsigned dummy; unsigned line_size, lines_per_tag, assoc, size_in_kb; @@ -251,27 +262,40 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static void __cpuinit +amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +{ + if (index < 3) + return; + this_leaf->can_disable = 1; +} + +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; unsigned edx; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { amd_cpuid4(index, &eax, &ebx, &ecx); - else - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + if (boot_cpu_data.x86 >= 0x10) + amd_check_l3_disable(index, this_leaf); + } else { + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + } + if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ this_leaf->eax = eax; this_leaf->ebx = ebx; this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } @@ -453,7 +477,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) /* pointer to _cpuid4_info array (for each cache leaf) */ static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) @@ -490,7 +514,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) this_leaf = CPUID4_INFO_IDX(cpu, index); for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); + sibling_leaf = CPUID4_INFO_IDX(sibling, index); cpu_clear(cpu, sibling_leaf->shared_cpu_map); } } @@ -516,7 +540,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) unsigned long j; int retval; cpumask_t oldmask; - cpumask_of_cpu_ptr(newmask, cpu); if (num_cache_leaves == 0) return -ENOENT; @@ -527,7 +550,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) return -ENOMEM; oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, newmask); + retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); if (retval) goto out; @@ -573,7 +596,7 @@ struct _index_kobject { /* pointer to array of kobjects for cpuX/cache/indexY */ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -638,6 +661,99 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { } } +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + +#ifdef CONFIG_PCI +static struct pci_dev *get_k8_northbridge(int node) +{ + struct pci_dev *dev = NULL; + int i; + + for (i = 0; i <= node; i++) { + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (!dev) + break; + } while (!pci_match_id(&k8_nb_id[0], dev)); + if (!dev) + break; + } + return dev; +} +#else +static struct pci_dev *get_k8_northbridge(int node) +{ + return NULL; +} +#endif + +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) +{ + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = NULL; + ssize_t ret = 0; + int i; + + if (!this_leaf->can_disable) + return sprintf(buf, "Feature not enabled\n"); + + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + + for (i = 0; i < 2; i++) { + unsigned int reg; + + pci_read_config_dword(dev, 0x1BC + i * 4, ®); + + ret += sprintf(buf, "%sEntry: %d\n", buf, i); + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", + buf, + reg & 0x80000000 ? "Disabled" : "Allowed", + reg & 0x40000000 ? "Disabled" : "Allowed"); + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", + buf, (reg & 0x30000) >> 16, reg & 0xfff); + } + return ret; +} + +static ssize_t +store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, + size_t count) +{ + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + struct pci_dev *dev = NULL; + unsigned int ret, index, val; + + if (!this_leaf->can_disable) + return 0; + + if (strlen(buf) > 15) + return -EINVAL; + + ret = sscanf(buf, "%x %x", &index, &val); + if (ret != 2) + return -EINVAL; + if (index > 1) + return -EINVAL; + + val |= 0xc0000000; + dev = get_k8_northbridge(node); + if (!dev) { + printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + return -EINVAL; + } + + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); + wbinvd(); + pci_write_config_dword(dev, 0x1BC + index * 4, val); + + return 1; +} + struct _cache_attr { struct attribute attr; ssize_t (*show)(struct _cpuid4_info *, char *); @@ -658,6 +774,8 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); +static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); + static struct attribute * default_attrs[] = { &type.attr, &level.attr, @@ -668,12 +786,10 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, + &cache_disable.attr, NULL }; -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) { struct _cache_attr *fattr = to_attr(attr); @@ -683,14 +799,22 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) ret = fattr->show ? fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), buf) : - 0; + 0; return ret; } static ssize_t store(struct kobject * kobj, struct attribute * attr, const char * buf, size_t count) { - return 0; + struct _cache_attr *fattr = to_attr(attr); + struct _index_kobject *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->store ? + fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf, count) : + 0; + return ret; } static struct sysfs_ops sysfs_ops = { diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 65a339678ec..4b031a4ac85 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = { }; DEFINE_PER_CPU(struct sys_device, device_mce); +void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ @@ -859,7 +860,7 @@ error: return err; } -static void mce_remove_device(unsigned int cpu) +static __cpuinit void mce_remove_device(unsigned int cpu) { int i; @@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, case CPU_ONLINE: case CPU_ONLINE_FROZEN: mce_create_device(cpu); + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); mce_remove_device(cpu); break; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 88736cadbaa..5eb390a4b2e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) deallocate_threshold_block(cpu, bank); free_out: + kobject_del(b->kobj); kobject_put(b->kobj); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; @@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu) } /* get notified when a cpu comes on/off */ -static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, + unsigned int cpu) { - /* cpu was unsigned int to begin with */ - unsigned int cpu = (unsigned long)hcpu; - if (cpu >= NR_CPUS) - goto out; + return; switch (action) { case CPU_ONLINE: @@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, default: break; } - out: - return NOTIFY_OK; } -static struct notifier_block threshold_cpu_notifier __cpuinitdata = { - .notifier_call = threshold_cpu_callback, -}; - static __init int threshold_init_device(void) { unsigned lcpu = 0; @@ -684,7 +676,7 @@ static __init int threshold_init_device(void) if (err) return err; } - register_hotcpu_notifier(&threshold_cpu_notifier); + threshold_cpu_callback = amd_64_threshold_cpu_callback; return 0; } diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl new file mode 100644 index 00000000000..dfea390e160 --- /dev/null +++ b/arch/x86/kernel/cpu/mkcapflags.pl @@ -0,0 +1,32 @@ +#!/usr/bin/perl +# +# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h +# + +($in, $out) = @ARGV; + +open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; +open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; + +print OUT "#include <asm/cpufeature.h>\n\n"; +print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; + +while (defined($line = <IN>)) { + if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { + $macro = $1; + $feature = $2; + $tail = $3; + if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { + $feature = $1; + } + + if ($feature ne '') { + printf OUT "\t%-32s = \"%s\",\n", + "[$macro]", "\L$feature"; + } + } +} +print OUT "};\n"; + +close(IN); +close(OUT); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 509bd3d9eac..cb7d3b6a80e 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) { unsigned int mask_lo, mask_hi, base_lo, base_hi; + unsigned int tmp, hi; rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { @@ -392,8 +393,23 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; + tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; + mask_lo = size_or_mask | tmp; + /* Expand tmp with high bits to all 1s*/ + hi = fls(tmp); + if (hi > 0) { + tmp |= ~((1<<(hi - 1)) - 1); + + if (tmp != mask_lo) { + static int once = 1; + + if (once) { + printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + once = 0; + } + mask_lo = tmp; + } + } /* This works correctly if size is a power of two, i.e. a contiguous range. */ diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969c8fa..58ac5d3d436 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -729,7 +729,7 @@ struct var_mtrr_range_state { mtrr_type type; }; -struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; static int __initdata debug_print; static int __init @@ -1496,11 +1496,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all */ if (!highest_pfn) { - if (!kvm_para_available()) { - printk(KERN_WARNING + WARN(!kvm_para_available(), KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); - WARN_ON(1); - } return 0; } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index de7439f82b9..05cc22dbd4f 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz) perfctr_msr = MSR_P4_IQ_PERFCTR1; evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR1; - cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); + + /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ + if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) + cccr_val = P4_CCCR_OVF_PMI0; + else + cccr_val = P4_CCCR_OVF_PMI1; + cccr_val |= P4_CCCR_ESCR_SELECT(4); } evntsel = P4_ESCR_EVENT_SELECT(0x3F) diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c new file mode 100644 index 00000000000..5abbea297e0 --- /dev/null +++ b/arch/x86/kernel/cpu/powerflags.c @@ -0,0 +1,20 @@ +/* + * Strings for the various x86 power flags + * + * This file must not contain any executable code. + */ + +#include <asm/cpufeature.h> + +const char *const x86_power_flags[32] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* tsc invariant mapped to constant_tsc */ + /* nothing */ +}; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index b911a2c61b8..738e03244f9 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -12,7 +12,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; - get_model_name(c); /* Same as AMD/Cyrix */ display_cacheinfo(c); /* Print CMS and CPU revision */ @@ -102,6 +101,7 @@ static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_init = init_transmeta, .c_identify = transmeta_identify, + .c_x86_vendor = X86_VENDOR_TRANSMETA, }; -cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); +cpu_dev_register(transmeta_cpu_dev); diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index b1fc90989d7..e777f79e096 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { } }, }, + .c_x86_vendor = X86_VENDOR_UMC, }; -cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); +cpu_dev_register(umc_cpu_dev); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 14b11b3be31..8e9cd6a8ec1 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -89,6 +89,8 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, struct cpuid_regs cmd; int cpu = iminor(file->f_path.dentry->d_inode); u64 pos = *ppos; + ssize_t bytes = 0; + int err = 0; if (count % 16) return -EINVAL; /* Invalid chunk size */ @@ -96,14 +98,19 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, for (; count; count -= 16) { cmd.eax = pos; cmd.ecx = pos >> 32; - smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); - if (copy_to_user(tmp, &cmd, 16)) - return -EFAULT; + err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); + if (err) + break; + if (copy_to_user(tmp, &cmd, 16)) { + err = -EFAULT; + break; + } tmp += 16; + bytes += 16; *ppos = ++pos; } - return tmp - buf; + return bytes ? bytes : err; } static int cpuid_open(struct inode *inode, struct file *file) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7b..e24d1bc47b4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -148,6 +148,9 @@ void __init e820_print_map(char *who) case E820_NVS: printk(KERN_CONT "(ACPI NVS)\n"); break; + case E820_UNUSABLE: + printk("(unusable)\n"); + break; default: printk(KERN_CONT "type %u\n", e820.map[i].type); break; @@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type) case E820_RAM: return "System RAM"; case E820_ACPI: return "ACPI Tables"; case E820_NVS: return "ACPI Non-volatile Storage"; + case E820_UNUSABLE: return "Unusable memory"; default: return "reserved"; } } @@ -1267,6 +1271,7 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ +static struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; @@ -1274,6 +1279,7 @@ void __init e820_reserve_resources(void) u64 end; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); + e820_res = res; for (i = 0; i < e820.nr_map; i++) { end = e820.map[i].addr + e820.map[i].size - 1; #ifndef CONFIG_RESOURCES_64BIT @@ -1287,7 +1293,14 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); + + /* + * don't register the region that could be conflicted with + * pci device BAR resource and insert them later in + * pcibios_resource_survey() + */ + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + insert_resource(&iomem_resource, res); res++; } @@ -1299,6 +1312,19 @@ void __init e820_reserve_resources(void) } } +void __init e820_reserve_resources_late(void) +{ + int i; + struct resource *res; + + res = e820_res; + for (i = 0; i < e820.nr_map; i++) { + if (!res->parent && res->end) + reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); + res++; + } +} + char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 4b63c8e1f13..5cab48ee61a 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -53,7 +53,7 @@ void efi_call_phys_prelog(void) * directory. If I have PAE, I just need to duplicate one entry in * page directory. */ - cr4 = read_cr4(); + cr4 = read_cr4_safe(); if (cr4 & X86_CR4_PAE) { efi_bak_pg_dir_pointer[0].pgd = @@ -91,7 +91,7 @@ void efi_call_phys_epilog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - cr4 = read_cr4(); + cr4 = read_cr4_safe(); if (cr4 & X86_CR4_PAE) { swapper_pg_dir[pgd_index(0)].pgd = diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 89434d43960..cf3a0b2d005 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64) ENTRY(ret_from_fork) CFI_DEFAULT_STACK push kernel_eflags(%rip) - CFI_ADJUST_CFA_OFFSET 4 + CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags - CFI_ADJUST_CFA_OFFSET -4 + CFI_ADJUST_CFA_OFFSET -8 call schedule_tail GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/kernel/es7000_32.c index 50189af14b8..849e5cd485b 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/kernel/es7000_32.c @@ -39,10 +39,93 @@ #include <asm/nmi.h> #include <asm/smp.h> #include <asm/apicdef.h> -#include "es7000.h" #include <mach_mpparse.h> /* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + +#define MIP_REG 1 +#define MIP_PSAI_REG 4 + +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL +#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) + +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) + +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; + +struct part_info { + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; +}; + +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; + +struct es7000_mem_info { + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; +}; + +struct es7000_oem_table { + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; +}; + +#ifdef CONFIG_ACPI + +struct oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; + +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + +struct mip_reg { + unsigned long long off_0; + unsigned long long off_8; + unsigned long long off_10; + unsigned long long off_18; + unsigned long long off_20; + unsigned long long off_28; + unsigned long long off_30; + unsigned long long off_38; +}; + +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + +/* * ES7000 Globals */ @@ -72,7 +155,7 @@ es7000_rename_gsi(int ioapic, int gsi) base += nr_ioapic_registers[i]; } - if (!ioapic && (gsi < 16)) + if (!ioapic && (gsi < 16)) gsi += base; return gsi; } diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd21..6c9bfc9e1e9 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,86 +16,63 @@ #include <linux/ctype.h> #include <linux/init.h> #include <linux/hardirq.h> +#include <linux/dmar.h> #include <asm/smp.h> #include <asm/ipi.h> #include <asm/genapic.h> -#ifdef CONFIG_ACPI -#include <acpi/acpi_bus.h> -#endif - -DEFINE_PER_CPU(int, x2apic_extra_bits); +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2xpic_uv_x; +extern struct genapic apic_x2apic_phys; +extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; -static enum uv_system_type uv_system_type; +static struct genapic *apic_probe[] __initdata = { + &apic_x2apic_uv_x, + &apic_x2apic_phys, + &apic_x2apic_cluster, + &apic_physflat, + NULL, +}; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ void __init setup_apic_routing(void) { - if (uv_system_type == UV_NON_UNIQUE_APIC) - genapic = &apic_x2apic_uv_x; - else -#ifdef CONFIG_ACPI - /* - * Quirk: some x86_64 machines can only use physical APIC mode - * regardless of how many processors are present (x86_64 ES7000 - * is an example). - */ - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) - genapic = &apic_physflat; - else -#endif - - if (max_physical_apicid < 8) - genapic = &apic_flat; - else - genapic = &apic_physflat; + if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { + if (!intr_remapping_enabled) + genapic = &apic_flat; + } - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + if (genapic == &apic_flat) { + if (max_physical_apicid >= 8) + genapic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + } } /* Same for both flat and physical. */ -void send_IPI_self(int vector) +void apic_send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (!strcmp(oem_id, "SGI")) { - if (!strcmp(oem_table_id, "UVL")) - uv_system_type = UV_LEGACY_APIC; - else if (!strcmp(oem_table_id, "UVX")) - uv_system_type = UV_X2APIC; - else if (!strcmp(oem_table_id, "UVH")) - uv_system_type = UV_NON_UNIQUE_APIC; + int i; + + for (i = 0; apic_probe[i]; ++i) { + if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { + genapic = apic_probe[i]; + printk(KERN_INFO "Setting APIC routing to %s.\n", + genapic->name); + return 1; + } } return 0; } - -unsigned int read_apic_id(void) -{ - unsigned int id; - - WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID); - if (uv_system_type >= UV_X2APIC) - id |= __get_cpu_var(x2apic_extra_bits); - return id; -} - -enum uv_system_type get_uv_system_type(void) -{ - return uv_system_type; -} - -int is_uv_system(void) -{ - return uv_system_type != UV_NONE; -} diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 786548a62d3..9eca5ba7a6b 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -15,9 +15,20 @@ #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/init.h> +#include <linux/hardirq.h> #include <asm/smp.h> #include <asm/ipi.h> #include <asm/genapic.h> +#include <mach_apicdef.h> + +#ifdef CONFIG_ACPI +#include <acpi/acpi_bus.h> +#endif + +static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 1; +} static cpumask_t flat_target_cpus(void) { @@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = (((x)>>24) & 0xFFu); + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = ((id & 0xFFu)<<24); + return x; +} + +static unsigned int read_xapic_id(void) +{ + unsigned int id; + + id = get_apic_id(apic_read(APIC_ID)); + return id; +} + static int flat_apic_id_registered(void) { - return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); + return physid_isset(read_xapic_id(), phys_cpu_present_map); } static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) @@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb) struct genapic apic_flat = { .name = "flat", + .acpi_madt_oem_check = flat_acpi_madt_oem_check, .int_delivery_mode = dest_LowestPrio, .int_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, @@ -121,8 +157,12 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; /* @@ -130,6 +170,21 @@ struct genapic apic_flat = { * We cannot use logical delivery in this case because the mask * overflows, so use physical mode. */ +static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ +#ifdef CONFIG_ACPI + /* + * Quirk: some x86_64 machines can only use physical APIC mode + * regardless of how many processors are present (x86_64 ES7000 + * is an example). + */ + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + return 1; +#endif + + return 0; +} static cpumask_t physflat_target_cpus(void) { @@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) struct genapic apic_physflat = { .name = "physical flat", + .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, @@ -185,6 +241,10 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c new file mode 100644 index 00000000000..e4bf2cc0d74 --- /dev/null +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -0,0 +1,159 @@ +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/dmar.h> + +#include <asm/smp.h> +#include <asm/ipi.h> +#include <asm/genapic.h> + +DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); + +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic) + return 1; + + return 0; +} + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +/* + * for now each logical cpu is in its own vector allocation domain. + */ +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +/* + * for now, we send the IPI's one by one in the cpumask. + * TBD: Based on the cpu mask, we can send the IPI's to the cluster group + * at once. We have 16 cpu's in a cluster. This will minimize IPI register + * writes. + */ +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_logical_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return current_cpu_data.initial_apicid >> index_msb; +} + +static void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +static void init_x2apic_ldr(void) +{ + int cpu = smp_processor_id(); + + per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); + return; +} + +struct genapic apic_x2apic_cluster = { + .name = "cluster x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), +}; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c new file mode 100644 index 00000000000..8f1343df262 --- /dev/null +++ b/arch/x86/kernel/genx2apic_phys.c @@ -0,0 +1,154 @@ +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/dmar.h> + +#include <asm/smp.h> +#include <asm/ipi.h> +#include <asm/genapic.h> + +static int x2apic_phys; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + +static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (cpu_has_x2apic && x2apic_phys) + return 1; + + return 0; +} + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static cpumask_t x2apic_vector_allocation_domain(int cpu) +{ + cpumask_t domain = CPU_MASK_NONE; + cpu_set(cpu, domain); + return domain; +} + +static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, + unsigned int dest) +{ + unsigned long cfg; + + cfg = __prepare_ICR(0, vector, dest); + + /* + * send the IPI. + */ + x2apic_icr_write(cfg, apicid); +} + +static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned long flags; + unsigned long query_cpu; + + local_irq_save(flags); + for_each_cpu_mask(query_cpu, mask) { + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + x2apic_send_IPI_mask(mask, vector); +} + +static void x2apic_send_IPI_all(int vector) +{ + x2apic_send_IPI_mask(cpu_online_map, vector); +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return per_cpu(x86_cpu_to_apicid, cpu); + else + return BAD_APICID; +} + +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = x; + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = id; + return x; +} + +static unsigned int phys_pkg_id(int index_msb) +{ + return current_cpu_data.initial_apicid >> index_msb; +} + +void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +void init_x2apic_ldr(void) +{ + return; +} + +struct genapic apic_x2apic_phys = { + .name = "physical x2apic", + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .target_cpus = x2apic_target_cpus, + .vector_allocation_domain = x2apic_vector_allocation_domain, + .apic_id_registered = x2apic_apic_id_registered, + .init_apic_ldr = init_x2apic_ldr, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_self = x2apic_send_IPI_self, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), +}; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2cfcbded888..ae2ffc8a400 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -12,12 +12,12 @@ #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/string.h> -#include <linux/kernel.h> #include <linux/ctype.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/bootmem.h> #include <linux/module.h> +#include <linux/hardirq.h> #include <asm/smp.h> #include <asm/ipi.h> #include <asm/genapic.h> @@ -26,6 +26,36 @@ #include <asm/uv/uv_hub.h> #include <asm/uv/bios.h> +DEFINE_PER_CPU(int, x2apic_extra_bits); + +static enum uv_system_type uv_system_type; + +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strcmp(oem_id, "SGI")) { + if (!strcmp(oem_table_id, "UVL")) + uv_system_type = UV_LEGACY_APIC; + else if (!strcmp(oem_table_id, "UVX")) + uv_system_type = UV_X2APIC; + else if (!strcmp(oem_table_id, "UVH")) { + uv_system_type = UV_NON_UNIQUE_APIC; + return 1; + } + } + return 0; +} + +enum uv_system_type get_uv_system_type(void) +{ + return uv_system_type; +} + +int is_uv_system(void) +{ + return uv_system_type != UV_NONE; +} +EXPORT_SYMBOL_GPL(is_uv_system); + DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); @@ -123,6 +153,10 @@ static int uv_apic_id_registered(void) return 1; } +static void uv_init_apic_ldr(void) +{ +} + static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) { int cpu; @@ -138,9 +172,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) return BAD_APICID; } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + WARN_ON(preemptible() && num_online_cpus() > 1); + id = x | __get_cpu_var(x2apic_extra_bits); + + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + /* maskout x2apic_extra_bits ? */ + x = id; + return x; +} + +static unsigned int uv_read_apic_id(void) +{ + + return get_apic_id(apic_read(APIC_ID)); +} + static unsigned int phys_pkg_id(int index_msb) { - return GET_APIC_ID(read_apic_id()) >> index_msb; + return uv_read_apic_id() >> index_msb; } #ifdef ZZZ /* Needs x2apic patch */ @@ -152,17 +211,22 @@ static void uv_send_IPI_self(int vector) struct genapic apic_x2apic_uv_x = { .name = "UV large system", + .acpi_madt_oem_check = uv_acpi_madt_oem_check, .int_delivery_mode = dest_Fixed, .int_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ .apic_id_registered = uv_apic_id_registered, + .init_apic_ldr = uv_init_apic_ldr, .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, /* ZZZ.send_IPI_self = uv_send_IPI_self, */ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFFFFFFFu), }; static __cpuinit void set_x2apic_extra_bits(int pnode) @@ -222,7 +286,7 @@ static __init void map_low_mmrs(void) enum map_type {map_wb, map_uc}; -static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) { unsigned long bytes, paddr; @@ -293,7 +357,9 @@ static __init void uv_rtc_init(void) sn_rtc_cycles_per_second = ticks_per_sec; } -static __init void uv_system_init(void) +static bool uv_system_inited; + +void __init uv_system_init(void) { union uvh_si_addr_map_config_u m_n_config; union uvh_node_id_u node_id; @@ -383,6 +449,7 @@ static __init void uv_system_init(void) map_mmr_high(max_pnode); map_config_high(max_pnode); map_mmioh_high(max_pnode); + uv_system_inited = true; } /* @@ -391,11 +458,12 @@ static __init void uv_system_init(void) */ void __cpuinit uv_cpu_init(void) { - if (!uv_node_to_blade) - uv_system_init(); + BUG_ON(!uv_system_inited); uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; if (get_uv_system_type() == UV_NON_UNIQUE_APIC) set_x2apic_extra_bits(uv_hub_info->pnode); } + + diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1b318e903bf..9bfc4d72fb2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data) BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f67e93441ca..a7010c3a377 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP 1: #endif /* CONFIG_SMP */ jmp *(initial_code) -.align 4 -ENTRY(initial_code) - .long i386_start_kernel /* * We depend on ET to be correct. This checks for 287/387. @@ -601,6 +598,11 @@ ignore_int: #endif iret +.section .cpuinit.data,"wa" +.align 4 +ENTRY(initial_code) + .long i386_start_kernel + .section .text /* * Real beginning of normal "text" segment diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad2b15a1334..59fd3b6b130 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -359,6 +359,7 @@ static int hpet_clocksource_register(void) int __init hpet_enable(void) { unsigned long id; + int i; if (!is_hpet_capable()) return 0; @@ -369,6 +370,29 @@ int __init hpet_enable(void) * Read the period and check for a sane value: */ hpet_period = hpet_readl(HPET_PERIOD); + + /* + * AMD SB700 based systems with spread spectrum enabled use a + * SMM based HPET emulation to provide proper frequency + * setting. The SMM code is initialized with the first HPET + * register access and takes some time to complete. During + * this time the config register reads 0xffffffff. We check + * for max. 1000 loops whether the config register reads a non + * 0xffffffff value to make sure that HPET is up and running + * before we go further. A counting loop is safe, as the HPET + * access takes thousands of CPU cycles. On non SB700 based + * machines this check is only done once and has no side + * effects. + */ + for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { + if (i == 1000) { + printk(KERN_WARNING + "HPET config register value = 0xFFFFFFFF. " + "Disabling HPET\n"); + goto out_nohpet; + } + } + if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) goto out_nohpet; diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index eb9ddd8efb8..45723f1fe19 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -21,9 +21,12 @@ # include <asm/sigcontext32.h> # include <asm/user32.h> #else -# define save_i387_ia32 save_i387 -# define restore_i387_ia32 restore_i387 +# define save_i387_xstate_ia32 save_i387_xstate +# define restore_i387_xstate_ia32 restore_i387_xstate # define _fpstate_ia32 _fpstate +# define _xstate_ia32 _xstate +# define sig_xstate_ia32_size sig_xstate_size +# define fx_sw_reserved_ia32 fx_sw_reserved # define user_i387_ia32_struct user_i387_struct # define user32_fxsr_struct user_fxsr_struct #endif @@ -36,6 +39,7 @@ static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; +unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; void __cpuinit mxcsr_feature_mask_init(void) @@ -61,6 +65,11 @@ void __init init_thread_xstate(void) return; } + if (cpu_has_xsave) { + xsave_cntxt_init(); + return; + } + if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -83,9 +92,19 @@ void __cpuinit fpu_init(void) write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + xsave_init(); + mxcsr_feature_mask_init(); /* clean state in init */ - current_thread_info()->status = 0; + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; clear_used_math(); } #endif /* CONFIG_X86_64 */ @@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, */ target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; + /* + * update the header bits in the xsave header, indicating the + * presence of FP and SSE state. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + return ret; } @@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!ret) convert_to_fxsr(target, &env); + /* + * update the header bit in the xsave header, indicating the + * presence of FP. + */ + if (cpu_has_xsave) + target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; return ret; } @@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; - unlazy_fpu(tsk); fp->status = fp->swd; if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; @@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) struct user_i387_ia32_struct env; int err = 0; - unlazy_fpu(tsk); - convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env))) return -1; @@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], fx, - sizeof(struct i387_fxsave_struct))) + if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) return -1; return 1; } -int save_i387_ia32(struct _fpstate_ia32 __user *buf) +static int save_i387_xsave(void __user *buf) +{ + struct _fpstate_ia32 __user *fx = buf; + int err = 0; + + if (save_i387_fxsave(fx) < 0) + return -1; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, + sizeof(struct _fpx_sw_bytes)); + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_ia32_size + - FP_XSTATE_MAGIC2_SIZE)); + if (err) + return -1; + + return 1; +} + +int save_i387_xstate_ia32(void __user *buf) { + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; + struct task_struct *tsk = current; + if (!used_math()) return 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) + return -EACCES; /* * This will cause a "finit" to be triggered by the next * attempted FPU operation by the 'current' process. @@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf) if (!HAVE_HWFP) { return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) ? -1 : 1; + NULL, fp) ? -1 : 1; } + unlazy_fpu(tsk); + + if (cpu_has_xsave) + return save_i387_xsave(fp); if (cpu_has_fxsr) - return save_i387_fxsave(buf); + return save_i387_fxsave(fp); else - return save_i387_fsave(buf); + return save_i387_fsave(fp); } static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) @@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) sizeof(struct i387_fsave_struct)); } -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) +static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, + unsigned int size) { struct task_struct *tsk = current; struct user_i387_ia32_struct env; int err; err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], - sizeof(struct i387_fxsave_struct)); + size); /* mxcsr reserved bits must be masked to zero for security reasons */ tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) @@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) return 0; } -int restore_i387_ia32(struct _fpstate_ia32 __user *buf) +static int restore_i387_xsave(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + struct _fpstate_ia32 __user *fx_user = + ((struct _fpstate_ia32 __user *) buf); + struct i387_fxsave_struct __user *fx = + (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; + struct xsave_hdr_struct *xsave_hdr = + ¤t->thread.xstate->xsave.xsave_hdr; + u64 mask; + int err; + + if (check_for_xstate(fx, buf, &fx_sw_user)) + goto fx_only; + + mask = fx_sw_user.xstate_bv; + + err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); + + xsave_hdr->xstate_bv &= pcntxt_mask; + /* + * These bits must be zero. + */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + /* + * Init the state that is not present in the memory layout + * and enabled by the OS. + */ + mask = ~(pcntxt_mask & ~mask); + xsave_hdr->xstate_bv &= mask; + + return err; +fx_only: + /* + * Couldn't find the extended state information in the memory + * layout. Restore the FP/SSE and init the other extended state + * enabled by the OS. + */ + xsave_hdr->xstate_bv = XSTATE_FPSSE; + return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); +} + +int restore_i387_xstate_ia32(void __user *buf) { int err; struct task_struct *tsk = current; + struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; if (HAVE_HWFP) clear_fpu(tsk); + if (!buf) { + if (used_math()) { + clear_fpu(tsk); + clear_used_math(); + } + + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) + return -EACCES; + if (!used_math()) { err = init_fpu(tsk); if (err) @@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf) } if (HAVE_HWFP) { - if (cpu_has_fxsr) - err = restore_i387_fxsave(buf); + if (cpu_has_xsave) + err = restore_i387_xsave(buf); + else if (cpu_has_fxsr) + err = restore_i387_fxsave(fp, sizeof(struct + i387_fxsave_struct)); else - err = restore_i387_fsave(buf); + err = restore_i387_fsave(fp); } else { err = fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + NULL, fp) != 0; } set_used_math(); diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index dc92b49d920..4b8a53d841f 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void) device_initcall(i8259A_init_sysfs); +void mask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void unmask_8259A(void) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ + outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + void init_8259A(int auto_eoi) { unsigned long flags; diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c..e710289f673 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -46,10 +46,13 @@ #include <asm/nmi.h> #include <asm/msidef.h> #include <asm/hypertransport.h> +#include <asm/setup.h> #include <mach_apic.h> #include <mach_apicdef.h> +#define __apicdebuginit(type) static type __init + int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; @@ -57,7 +60,7 @@ atomic_t irq_mis_count; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +DEFINE_SPINLOCK(vector_lock); int timer_through_8259 __initdata; @@ -1209,10 +1212,6 @@ static int assign_irq_vector(int irq) return vector; } -void setup_vector_irq(int cpu) -{ -} - static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 @@ -1345,7 +1344,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __init print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1460,9 +1460,7 @@ void __init print_IO_APIC(void) return; } -#if 0 - -static void print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1483,9 +1481,10 @@ static void print_APIC_bitfield(int base) } } -void /*__init*/ print_local_APIC(void *dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1494,7 +1493,7 @@ void /*__init*/ print_local_APIC(void *dummy) smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(read_apic_id())); + GET_APIC_ID(v)); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1536,10 +1535,9 @@ void /*__init*/ print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); @@ -1567,12 +1565,12 @@ void /*__init*/ print_local_APIC(void *dummy) printk("\n"); } -void print_all_local_APICs(void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void /*__init*/ print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1604,7 +1602,17 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + static void __init enable_IO_APIC(void) { @@ -1702,8 +1710,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = - GET_APIC_ID(read_apic_id()); + entry.dest.physical.physical_dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: @@ -1729,10 +1736,8 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned char old_id; unsigned long flags; -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) return; -#endif /* * Don't check I/O APIC IDs for xAPIC systems. They have @@ -2333,8 +2338,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } /* diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d170..a1bec2969c6 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -37,6 +37,7 @@ #include <acpi/acpi_bus.h> #endif #include <linux/bootmem.h> +#include <linux/dmar.h> #include <asm/idle.h> #include <asm/io.h> @@ -49,10 +50,13 @@ #include <asm/nmi.h> #include <asm/msidef.h> #include <asm/hypertransport.h> +#include <asm/irq_remapping.h> #include <mach_ipi.h> #include <mach_apic.h> +#define __apicdebuginit(type) static type __init + struct irq_cfg { cpumask_t domain; cpumask_t old_domain; @@ -87,8 +91,6 @@ int first_system_vector = 0xfe; char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; -#define __apicdebuginit __init - int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; @@ -101,13 +103,16 @@ int timer_through_8259 __initdata; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -DEFINE_SPINLOCK(vector_lock); +static DEFINE_SPINLOCK(vector_lock); /* * # of IRQ routing registers */ int nr_ioapic_registers[MAX_IO_APICS]; +/* I/O APIC RTE contents at the OS boot up */ +struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + /* I/O APIC entries */ struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; @@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) pin = entry->pin; if (pin == -1) break; - io_apic_write(apic, 0x11 + pin*2, dest); + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -440,6 +450,69 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} + int skip_ioapic_setup; int ioapic_force; @@ -697,6 +770,19 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + spin_lock(&vector_lock); +} + +void unlock_vector_lock(void) +{ + spin_unlock(&vector_lock); +} + static int __assign_irq_vector(int irq, cpumask_t mask) { /* @@ -802,7 +888,7 @@ static void __clear_irq_vector(int irq) cpus_clear(cfg->domain); } -static void __setup_vector_irq(int cpu) +void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ @@ -825,27 +911,99 @@ static void __setup_vector_irq(int cpu) } } -void setup_vector_irq(int cpu) -{ - spin_lock(&vector_lock); - __setup_vector_irq(smp_processor_id()); - spin_unlock(&vector_lock); -} - - static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) { + if (trigger) irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { + else irq_desc[irq].status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if (trigger) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, @@ -870,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, irq, trigger, polarity); - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest = cpu_mask_to_apicid(mask); - entry.mask = 0; /* enable IRQ */ - entry.trigger = trigger; - entry.polarity = polarity; - entry.vector = cfg->vector; - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry.mask = 1; + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } ioapic_register_intr(irq, trigger); if (irq < 16) @@ -939,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; + if (intr_remapping_enabled) + return; + memset(&entry, 0, sizeof(entry)); /* @@ -965,7 +1117,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __apicdebuginit print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1059,9 +1212,7 @@ void __apicdebuginit print_IO_APIC(void) return; } -#if 0 - -static __apicdebuginit void print_APIC_bitfield (int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1082,9 +1233,10 @@ static __apicdebuginit void print_APIC_bitfield (int base) } } -void __apicdebuginit print_local_APIC(void * dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; + unsigned long icr; if (apic_verbosity == APIC_QUIET) return; @@ -1092,7 +1244,7 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1128,10 +1280,9 @@ void __apicdebuginit print_local_APIC(void * dummy) v = apic_read(APIC_ESR); printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); @@ -1159,12 +1310,12 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n"); } -void print_all_local_APICs (void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void __apicdebuginit print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1196,7 +1347,17 @@ void __apicdebuginit print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + void __init enable_IO_APIC(void) { @@ -1286,7 +1447,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = GET_APIC_ID(read_apic_id()); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: @@ -1392,6 +1553,147 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct irq_desc *desc = irq_desc + irq; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte = desc->status & IRQ_LEVEL; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + + ret = 0; + irq_desc[irq].status &= ~IRQ_MOVE_PENDING; + cpus_clear(irq_desc[irq].pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_desc + irq; + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, + irq_desc[irq].pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + if (irq_desc[irq].status & IRQ_LEVEL) { + irq_desc[irq].status |= IRQ_MOVE_PENDING; + irq_desc[irq].pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; @@ -1448,6 +1750,17 @@ static void irq_complete_move(unsigned int irq) #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -1522,6 +1835,21 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + static inline void init_IO_APIC_traps(void) { int irq; @@ -1707,6 +2035,8 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -1733,6 +2063,8 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -1849,8 +2181,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } struct sysfs_ioapic_data { @@ -1972,6 +2302,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -1990,10 +2323,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if (!err) { - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + if (err) + return err; + + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | @@ -2044,6 +2408,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) write_msi_msg(irq, &msg); irq_desc[irq].affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + irq_desc[irq].affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2061,26 +2474,157 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) { + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +{ + int ret; struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_desc + irq; + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ int irq, ret; + irq = create_irq(); if (irq < 0) return irq; - ret = msi_compose_msg(dev, irq, &msg); +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; } + return 0; - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} + +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, sub_handle; + struct msi_desc *desc; +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq(); + if (irq < 0) + return irq; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } return 0; + +error: + destroy_irq(irq); + return ret; } void arch_teardown_msi_irq(unsigned int irq) @@ -2328,6 +2872,10 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif else set_ioapic_affinity_irq(irq, TARGET_CPUS); } diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 50e5e4a31c8..19191430274 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/thread_info.h> #include <linux/syscalls.h> +#include <asm/syscalls.h> /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 3f7537b669d..f1c688e46f3 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -20,6 +20,8 @@ #ifdef CONFIG_X86_32 #include <mach_apic.h> +#include <mach_ipi.h> + /* * the following functions deal with sending IPIs between CPUs. * @@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) } /* must come after the send_IPI functions above for inlining */ -#include <mach_ipi.h> static int convert_apicid_to_cpu(int apic_id) { int i; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index d66914287ee..9200a1e2752 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -74,6 +74,15 @@ void __init init_ISA_irqs (void) } } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -98,6 +107,46 @@ void __init native_init_IRQ(void) set_intr_gate(vector, interrupt[i]); } +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for single call function */ + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); + /* setup after call gates are initialised (usually add in * the architecture specific gates) */ diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 43c019f85f0..6c27679ec6a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->ip = (unsigned long)p->ainsn.insn; } -/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; @@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); + kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 3fee2aa50f3..0ed5f939b90 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -18,6 +18,7 @@ #include <asm/ldt.h> #include <asm/desc.h> #include <asm/mmu_context.h> +#include <asm/syscalls.h> #ifdef CONFIG_SMP static void flush_ldt(void *current_mm) @@ -62,12 +63,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (reload) { #ifdef CONFIG_SMP - cpumask_of_cpu_ptr_declare(mask); - preempt_disable(); load_LDT(pc); - cpumask_of_cpu_ptr_next(mask, smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) + if (!cpus_equal(current->mm->cpu_vm_mask, + cpumask_of_cpu(smp_processor_id()))) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8864230d55a..0732adba05c 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/numa.h> #include <linux/ftrace.h> +#include <linux/suspend.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -22,6 +23,7 @@ #include <asm/cpufeature.h> #include <asm/desc.h> #include <asm/system.h> +#include <asm/cacheflush.h> #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) static u32 kexec_pgd[1024] PAGE_ALIGNED; @@ -77,7 +79,7 @@ static void load_segments(void) /* * A architecture hook called to validate the * proposed image and prepare the control pages - * as needed. The pages for KEXEC_CONTROL_CODE_SIZE + * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE * have been allocated, but the segments have yet * been copied into the kernel. * @@ -85,10 +87,12 @@ static void load_segments(void) * reboot code buffer to allow us to avoid allocations * later. * - * Currently nothing. + * Make control page executable. */ int machine_kexec_prepare(struct kimage *image) { + if (nx_enabled) + set_pages_x(image->control_code_page, 1); return 0; } @@ -98,27 +102,54 @@ int machine_kexec_prepare(struct kimage *image) */ void machine_kexec_cleanup(struct kimage *image) { + if (nx_enabled) + set_pages_nx(image->control_code_page, 1); } /* * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ -NORET_TYPE void machine_kexec(struct kimage *image) +void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; + int save_ftrace_enabled; + asmlinkage unsigned long + (*relocate_kernel_ptr)(unsigned long indirection_page, + unsigned long control_page, + unsigned long start_address, + unsigned int has_pae, + unsigned int preserve_context); + +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + save_processor_state(); +#endif - tracer_disable(); + save_ftrace_enabled = __ftrace_enabled_save(); /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + if (image->preserve_context) { +#ifdef CONFIG_X86_IO_APIC + /* We need to put APICs in legacy mode so that we can + * get timer interrupts in second kernel. kexec/kdump + * paths already have calls to disable_IO_APIC() in + * one form or other. kexec jump path also need + * one. + */ + disable_IO_APIC(); +#endif + } + control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, PAGE_SIZE); + memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); + relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; + page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; page_list[PA_PGD] = __pa(kexec_pgd); page_list[VA_PGD] = (unsigned long)kexec_pgd; #ifdef CONFIG_X86_PAE @@ -131,6 +162,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) page_list[VA_PTE_0] = (unsigned long)kexec_pte0; page_list[PA_PTE_1] = __pa(kexec_pte1); page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); /* The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is @@ -149,8 +181,17 @@ NORET_TYPE void machine_kexec(struct kimage *image) set_idt(phys_to_virt(0),0); /* now call it */ - relocate_kernel((unsigned long)image->head, (unsigned long)page_list, - image->start, cpu_has_pae); + image->start = relocate_kernel_ptr((unsigned long)image->head, + (unsigned long)page_list, + image->start, cpu_has_pae, + image->preserve_context); + +#ifdef CONFIG_KEXEC_JUMP + if (kexec_image->preserve_context) + restore_processor_state(); +#endif + + __ftrace_enabled_restore(save_ftrace_enabled); } void arch_crash_save_vmcoreinfo(void) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9dd9262693a..c43caa3a91f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image) * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ -NORET_TYPE void machine_kexec(struct kimage *image) +void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; void *control_page; diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 07c0f828f48..3b599518c32 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -33,6 +33,8 @@ #include <linux/module.h> #include <asm/geode.h> +#define MFGPT_DEFAULT_IRQ 7 + static struct mfgpt_timer_t { unsigned int avail:1; } mfgpt_timers[MFGPT_MAX_TIMERS]; @@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) } EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); -int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) +int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) { - u32 val, dummy; - int offset; + u32 zsel, lpc, dummy; + int shift; if (timer < 0 || timer >= MFGPT_MAX_TIMERS) return -EIO; - if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + /* + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA + * is using the same CMP of the timer's Siamese twin, the IRQ is set to + * 2, and we mustn't use nor change it. + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this + * with *irq==0 is safe. Currently there _are_ no 2 drivers. + */ + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; + if (((zsel >> shift) & 0xF) == 2) return -EIO; - rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ + if (!*irq) + *irq = (zsel >> shift) & 0xF; + if (!*irq) + *irq = MFGPT_DEFAULT_IRQ; - offset = (timer % 4) * 4; - - val &= ~((0xF << offset) | (0xF << (offset + 16))); + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ + if (*irq < 1 || *irq == 2 || *irq > 15) + return -EIO; + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); + if (lpc & (1 << *irq)) + return -EIO; + /* All chosen and checked - go for it */ + if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; if (enable) { - val |= (irq & 0x0F) << (offset); - val |= (irq & 0x0F) << (offset + 16); + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); } - wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); return 0; } @@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; static u16 mfgpt_event_clock; -static int irq = 7; +static int irq; static int __init mfgpt_setup(char *str) { get_option(&str, &irq); @@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void) mfgpt_event_clock = timer; /* Set up the IRQ on the MFGPT side */ - if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { + if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); return -EIO; } @@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void) &mfgpt_clockevent); printk(KERN_INFO - "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); + "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", + timer, irq); clockevents_register_device(&mfgpt_clockevent); return 0; err: - geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); + geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); printk(KERN_ERR "mfgpt-timer: Unable to set up the MFGPT clock source\n"); return -EIO; diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 6994c751590..652fa5c38eb 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -388,7 +388,6 @@ static int do_microcode_update (void) void *new_mc = NULL; int cpu; cpumask_t old; - cpumask_of_cpu_ptr_declare(newmask); old = current->cpus_allowed; @@ -405,8 +404,7 @@ static int do_microcode_update (void) if (!uci->valid) continue; - cpumask_of_cpu_ptr_next(newmask, cpu); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); error = get_maching_microcode(new_mc, cpu); if (error < 0) goto out; @@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu) struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); unsigned int val[2]; int err = 0; @@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu) return 0; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); /* Check if the microcode we have in memory matches the CPU */ if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || @@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu) static void microcode_init_cpu(int cpu, int resume) { cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; old = current->cpus_allowed; - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); collect_cpu_info(cpu); if (uci->valid && system_state == SYSTEM_RUNNING && !resume) @@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev, if (end == buf) return -EINVAL; if (val == 1) { - cpumask_t old; - cpumask_of_cpu_ptr(newmask, cpu); - - old = current->cpus_allowed; + cpumask_t old = current->cpus_allowed; get_online_cpus(); - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); if (uci->valid) diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b36..efc2f361fe8 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { {} }; -void __init check_enable_amd_mmconf_dmi(void) +void __cpuinit check_enable_amd_mmconf_dmi(void) { dmi_check_system(mmconf_dmi_table); } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6ae005ccaed..f98f4e1dba0 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info(struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int apicid; char *bootup_cpu = ""; @@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) if (x86_quirks->mpc_oem_bus_info) x86_quirks->mpc_oem_bus_info(m, str); else - printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); + apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); #if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { @@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) static void print_MP_intsrc_info(struct mpc_config_intsrc *m) { - printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, @@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m) static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) { - printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, @@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) { - printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," + apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC LINT %02x\n", m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, @@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) generic_bigsmp_probe(); #endif +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; @@ -484,7 +486,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) } -static void construct_ioapic_table(int mpc_default_type) +static void __init construct_ioapic_table(int mpc_default_type) { struct mpc_config_ioapic ioapic; struct mpc_config_bus bus; @@ -529,7 +531,7 @@ static void construct_ioapic_table(int mpc_default_type) construct_default_ioirq_mptable(mpc_default_type); } #else -static inline void construct_ioapic_table(int mpc_default_type) { } +static inline void __init construct_ioapic_table(int mpc_default_type) { } #endif static inline void __init construct_default_ISA_mptable(int mpc_default_type) @@ -695,7 +697,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; - printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); + apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", + bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 9fd80955244..2e2af5d1819 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -72,21 +72,28 @@ static ssize_t msr_read(struct file *file, char __user *buf, u32 data[2]; u32 reg = *ppos; int cpu = iminor(file->f_path.dentry->d_inode); - int err; + int err = 0; + ssize_t bytes = 0; if (count % 8) return -EINVAL; /* Invalid chunk size */ for (; count; count -= 8) { err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); - if (err) - return -EIO; - if (copy_to_user(tmp, &data, 8)) - return -EFAULT; + if (err) { + if (err == -EFAULT) /* Fix idiotic error code */ + err = -EIO; + break; + } + if (copy_to_user(tmp, &data, 8)) { + err = -EFAULT; + break; + } tmp += 2; + bytes += 8; } - return ((char __user *)tmp) - buf; + return bytes ? bytes : err; } static ssize_t msr_write(struct file *file, const char __user *buf, @@ -96,21 +103,28 @@ static ssize_t msr_write(struct file *file, const char __user *buf, u32 data[2]; u32 reg = *ppos; int cpu = iminor(file->f_path.dentry->d_inode); - int err; + int err = 0; + ssize_t bytes = 0; if (count % 8) return -EINVAL; /* Invalid chunk size */ for (; count; count -= 8) { - if (copy_from_user(&data, tmp, 8)) - return -EFAULT; + if (copy_from_user(&data, tmp, 8)) { + err = -EFAULT; + break; + } err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); - if (err) - return -EIO; + if (err) { + if (err == -EFAULT) /* Fix idiotic error code */ + err = -EIO; + break; + } tmp += 2; + bytes += 8; } - return ((char __user *)tmp) - buf; + return bytes ? bytes : err; } static int msr_open(struct inode *inode, struct file *file) @@ -131,7 +145,7 @@ static int msr_open(struct inode *inode, struct file *file) ret = -EIO; /* MSR not supported */ out: unlock_kernel(); - return 0; + return ret; } /* diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ac6d51222e7..abb78a2cc4a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data) } #endif +static void report_broken_nmi(int cpu, int *prev_nmi_count) +{ + printk(KERN_CONT "\n"); + + printk(KERN_WARNING + "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); + + printk(KERN_WARNING + "Please report this to bugzilla.kernel.org,\n"); + printk(KERN_WARNING + "and attach the output of the 'dmesg' command.\n"); + + per_cpu(wd_enabled, cpu) = 0; + atomic_dec(&nmi_active); +} + int __init check_nmi_watchdog(void) { unsigned int *prev_nmi_count; @@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void) for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk(KERN_WARNING "WARNING: CPU#%d: NMI " - "appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - get_nmi_count(cpu)); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) + report_broken_nmi(cpu, prev_nmi_count); } endflag = 1; if (!atomic_read(&nmi_active)) { diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c45610b20..4caff39078e 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -73,7 +73,7 @@ static void __init smp_dump_qct(void) } -void __init numaq_tsc_disable(void) +void __cpuinit numaq_tsc_disable(void) { if (!found_numaq) return; @@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } } +static int __init numaq_setup_ioapic_ids(void) +{ + /* so can skip it */ + return 1; +} + static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_bus_info = mpc_oem_bus_info, .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, + .setup_ioapic_ids = numaq_setup_ioapic_ids, }; void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d79..6b0bb73998d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = { #endif .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, + .read_msr_amd = native_read_msr_amd_safe, .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, @@ -373,8 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = { struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC - .apic_write = native_apic_write, - .apic_read = native_apic_read, .setup_boot_clock = setup_boot_APIC_clock, .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, @@ -471,7 +470,7 @@ struct pv_lock_ops pv_lock_ops = { .spin_unlock = __ticket_spin_unlock, #endif }; -EXPORT_SYMBOL_GPL(pv_lock_ops); +EXPORT_SYMBOL(pv_lock_ops); EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 151f2d171f7..dcdac6c826e 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/crash_dump.h> #include <linux/dma-mapping.h> #include <linux/bitops.h> #include <linux/pci_ids.h> @@ -36,6 +37,7 @@ #include <linux/delay.h> #include <linux/scatterlist.h> #include <linux/iommu-helper.h> + #include <asm/iommu.h> #include <asm/calgary.h> #include <asm/tce.h> @@ -167,6 +169,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl); static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); static void calioc2_tce_cache_blast(struct iommu_table *tbl); static void calioc2_dump_error_regs(struct iommu_table *tbl); +static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); +static void get_tce_space_from_tar(void); static struct cal_chipset_ops calgary_chip_ops = { .handle_quirks = calgary_handle_quirks, @@ -339,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, /* were we called with bad_dma_address? */ badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { - printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " + WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); - WARN_ON(1); return; } @@ -410,22 +413,6 @@ static void calgary_unmap_sg(struct device *dev, } } -static int calgary_nontranslate_map_sg(struct device* dev, - struct scatterlist *sg, int nelems, int direction) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nelems, i) { - struct page *p = sg_page(s); - - BUG_ON(!p); - s->dma_address = virt_to_bus(sg_virt(s)); - s->dma_length = s->length; - } - return nelems; -} - static int calgary_map_sg(struct device *dev, struct scatterlist *sg, int nelems, int direction) { @@ -436,9 +423,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, unsigned long entry; int i; - if (!translation_enabled(tbl)) - return calgary_nontranslate_map_sg(dev, sg, nelems, direction); - for_each_sg(sg, s, nelems, i) { BUG_ON(!sg_page(s)); @@ -474,7 +458,6 @@ error: static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, size_t size, int direction) { - dma_addr_t dma_handle = bad_dma_address; void *vaddr = phys_to_virt(paddr); unsigned long uaddr; unsigned int npages; @@ -483,12 +466,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, uaddr = (unsigned long)vaddr; npages = num_dma_pages(uaddr, size); - if (translation_enabled(tbl)) - dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); - else - dma_handle = virt_to_bus(vaddr); - - return dma_handle; + return iommu_alloc(dev, tbl, vaddr, npages, direction); } static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, @@ -497,9 +475,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, struct iommu_table *tbl = find_iommu_table(dev); unsigned int npages; - if (!translation_enabled(tbl)) - return; - npages = num_dma_pages(dma_handle, size); iommu_free(tbl, dma_handle, npages); } @@ -522,18 +497,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, goto error; memset(ret, 0, size); - if (translation_enabled(tbl)) { - /* set up tces to cover the allocated range */ - mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == bad_dma_address) - goto free; - - *dma_handle = mapping; - } else /* non translated slot */ - *dma_handle = virt_to_bus(ret); - + /* set up tces to cover the allocated range */ + mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); + if (mapping == bad_dma_address) + goto free; + *dma_handle = mapping; return ret; - free: free_pages((unsigned long)ret, get_order(size)); ret = NULL; @@ -541,7 +510,7 @@ error: return ret; } -static const struct dma_mapping_ops calgary_dma_ops = { +static struct dma_mapping_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, .map_single = calgary_map_single, .unmap_single = calgary_unmap_single, @@ -830,7 +799,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) tbl = pci_iommu(dev->bus); tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; - tce_free(tbl, 0, tbl->it_size); + + if (is_kdump_kernel()) + calgary_init_bitmap_from_tce_table(tbl); + else + tce_free(tbl, 0, tbl->it_size); if (is_calgary(dev->device)) tbl->chip_ops = &calgary_chip_ops; @@ -1209,6 +1182,10 @@ static int __init calgary_init(void) if (ret) return ret; + /* Purely for kdump kernel case */ + if (is_kdump_kernel()) + get_tce_space_from_tar(); + do { dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); if (!dev) @@ -1230,6 +1207,16 @@ static int __init calgary_init(void) goto error; } while (1); + dev = NULL; + for_each_pci_dev(dev) { + struct iommu_table *tbl; + + tbl = find_iommu_table(&dev->dev); + + if (translation_enabled(tbl)) + dev->dev.archdata.dma_ops = &calgary_dma_ops; + } + return ret; error: @@ -1251,6 +1238,7 @@ error: calgary_disable_translation(dev); calgary_free_bus(dev); pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ + dev->dev.archdata.dma_ops = NULL; } while (1); return ret; @@ -1280,13 +1268,15 @@ static inline int __init determine_tce_table_size(u64 ram) static int __init build_detail_arrays(void) { unsigned long ptr; - int i, scal_detail_size, rio_detail_size; + unsigned numnodes, i; + int scal_detail_size, rio_detail_size; - if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + numnodes = rio_table_hdr->num_scal_dev; + if (numnodes > MAX_NUMNODES){ printk(KERN_WARNING "Calgary: MAX_NUMNODES too low! Defined as %d, " "but system has %d nodes.\n", - MAX_NUMNODES, rio_table_hdr->num_scal_dev); + MAX_NUMNODES, numnodes); return -ENODEV; } @@ -1307,8 +1297,7 @@ static int __init build_detail_arrays(void) } ptr = ((unsigned long)rio_table_hdr) + 3; - for (i = 0; i < rio_table_hdr->num_scal_dev; - i++, ptr += scal_detail_size) + for (i = 0; i < numnodes; i++, ptr += scal_detail_size) scal_devs[i] = (struct scal_detail *)ptr; for (i = 0; i < rio_table_hdr->num_rio_dev; @@ -1339,6 +1328,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) return (val != 0xffffffff); } +/* + * calgary_init_bitmap_from_tce_table(): + * Funtion for kdump case. In the second/kdump kernel initialize + * the bitmap based on the tce table entries obtained from first kernel + */ +static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) +{ + u64 *tp; + unsigned int index; + tp = ((u64 *)tbl->it_base); + for (index = 0 ; index < tbl->it_size; index++) { + if (*tp != 0x0) + set_bit(index, tbl->it_map); + tp++; + } +} + +/* + * get_tce_space_from_tar(): + * Function for kdump case. Get the tce tables from first kernel + * by reading the contents of the base adress register of calgary iommu + */ +static void __init get_tce_space_from_tar(void) +{ + int bus; + void __iomem *target; + unsigned long tce_space; + + for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { + struct calgary_bus_info *info = &bus_info[bus]; + unsigned short pci_device; + u32 val; + + val = read_pci_config(bus, 0, 0, 0); + pci_device = (val & 0xFFFF0000) >> 16; + + if (!is_cal_pci_dev(pci_device)) + continue; + if (info->translation_disabled) + continue; + + if (calgary_bus_has_devices(bus, pci_device) || + translate_empty_slots) { + target = calgary_reg(bus_info[bus].bbar, + tar_offset(bus)); + tce_space = be64_to_cpu(readq(target)); + tce_space = tce_space & TAR_SW_BITS; + + tce_space = tce_space & (~specified_table_size); + info->tce_space = (u64 *)__va(tce_space); + } + } + return; +} + void __init detect_calgary(void) { int bus; @@ -1394,7 +1438,8 @@ void __init detect_calgary(void) return; } - specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); + specified_table_size = determine_tce_table_size((is_kdump_kernel() ? + saved_max_pfn : max_pfn) * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { struct calgary_bus_info *info = &bus_info[bus]; @@ -1412,10 +1457,16 @@ void __init detect_calgary(void) if (calgary_bus_has_devices(bus, pci_device) || translate_empty_slots) { - tbl = alloc_tce_table(); - if (!tbl) - goto cleanup; - info->tce_space = tbl; + /* + * If it is kdump kernel, find and use tce tables + * from first kernel, else allocate tce tables here + */ + if (!is_kdump_kernel()) { + tbl = alloc_tce_table(); + if (!tbl) + goto cleanup; + info->tce_space = tbl; + } calgary_found = 1; } } @@ -1430,6 +1481,10 @@ void __init detect_calgary(void) printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, debugging ? "enabled" : "disabled"); + + /* swiotlb for devices that aren't behind the Calgary. */ + if (max_pfn > MAX_DMA32_PFN) + swiotlb = 1; } return; @@ -1446,7 +1501,7 @@ int __init calgary_iommu_init(void) { int ret; - if (no_iommu || swiotlb) + if (no_iommu || (swiotlb && !calgary_detected)) return -ENODEV; if (!calgary_detected) @@ -1459,15 +1514,14 @@ int __init calgary_iommu_init(void) if (ret) { printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " "falling back to no_iommu\n", ret); - if (max_pfn > MAX_DMA32_PFN) - printk(KERN_ERR "WARNING more than 4GB of memory, " - "32bit PCI may malfunction.\n"); return ret; } force_iommu = 1; bad_dma_address = 0x0; - dma_ops = &calgary_dma_ops; + /* dma_ops is set to swiotlb or nommu */ + if (!dma_ops) + dma_ops = &nommu_dma_ops; return 0; } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05551b..87d4d6964ec 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,7 +11,7 @@ static int forbid_dac __read_mostly; -const struct dma_mapping_ops *dma_ops; +struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } + +unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +{ + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + + return size >> PAGE_SHIFT; +} +EXPORT_SYMBOL(iommu_num_pages); #endif /* @@ -192,126 +200,10 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -#ifdef CONFIG_X86_32 -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if (!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pos, err; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); - - pages >>= PAGE_SHIFT; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - *ret = mem->virt_base + (page << PAGE_SHIFT); - memset(*ret, 0, size); - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - *ret = NULL; - } - return (mem != NULL); -} - -static int dma_release_coherent(struct device *dev, int order, void *vaddr) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - - if (mem && vaddr >= mem->virt_base && vaddr < - (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - return 1; - } - return 0; -} -#else -#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) -#define dma_release_coherent(dev, order, vaddr) (0) -#endif /* CONFIG_X86_32 */ - int dma_supported(struct device *dev, u64 mask) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { dev_info(dev, "PCI: Disallowing DAC for device\n"); @@ -319,8 +211,8 @@ int dma_supported(struct device *dev, u64 mask) } #endif - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); + if (ops->dma_supported) + return ops->dma_supported(dev, mask); /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. @@ -367,6 +259,7 @@ void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { + struct dma_mapping_ops *ops = get_dma_ops(dev); void *memory = NULL; struct page *page; unsigned long dma_mask = 0; @@ -376,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; if (!dev) { @@ -435,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* Let low level make its own zone decisions */ gfp &= ~(GFP_DMA32|GFP_DMA); - if (dma_ops->alloc_coherent) - return dma_ops->alloc_coherent(dev, size, + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, dma_handle, gfp); return NULL; } @@ -448,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } } - if (dma_ops->alloc_coherent) { + if (ops->alloc_coherent) { free_pages((unsigned long)memory, get_order(size)); gfp &= ~(GFP_DMA|GFP_DMA32); - return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); + return ops->alloc_coherent(dev, size, dma_handle, gfp); } - if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), + if (ops->map_simple) { + *dma_handle = ops->map_simple(dev, virt_to_phys(memory), size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle != bad_dma_address) @@ -477,12 +370,14 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_coherent(dev, order, vaddr)) + if (dma_release_from_coherent(dev, order, vaddr)) return; - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, bus, size, 0); + if (ops->unmap_single) + ops->unmap_single(dev, bus, size, 0); free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d..49285f8fd4d 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -67,9 +67,6 @@ static u32 gart_unmapped_entry; (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) -#define to_pages(addr, size) \ - (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) - #define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP @@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir) { - unsigned long npages = to_pages(phys_mem, size); + unsigned long npages = iommu_num_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(dev, npages); int i; @@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; - npages = to_pages(dma_addr, size); + npages = iommu_num_pages(dma_addr, size); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; CLEAR_LEAK(iommu_page + i); @@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, } addr = phys_addr; - pages = to_pages(s->offset, s->length); + pages = iommu_num_pages(s->offset, s->length); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); @@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) seg_size += s->length; need = nextneed; - pages += to_pages(s->offset, s->length); + pages += iommu_num_pages(s->offset, s->length); ps = s; } if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) @@ -692,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) extern int agp_amd64_init(void); -static const struct dma_mapping_ops gart_dma_ops = { - .mapping_error = NULL, +static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .map_simple = gart_map_simple, .unmap_single = gart_unmap_single, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 792b9179eff..3f91f71cdc3 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -/* Make sure we keep the same behaviour */ -static int nommu_mapping_error(dma_addr_t dma_addr) -{ -#ifdef CONFIG_X86_32 - return 0; -#else - return (dma_addr == bad_dma_address); -#endif -} - - -const struct dma_mapping_ops nommu_dma_ops = { +struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, .map_sg = nommu_map_sg, - .mapping_error = nommu_mapping_error, .is_phys = 1, }; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 20df839b9c2..c4ce0332759 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); } -const struct dma_mapping_ops swiotlb_dma_ops = { +struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a..9f94bb1c811 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,7 +15,6 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; -static int force_mwait __cpuinitdata; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 68f4ae2ac99..1962d27c6b8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,6 +55,8 @@ #include <asm/tlbflush.h> #include <asm/cpu.h> #include <asm/kdebug.h> +#include <asm/syscalls.h> +#include <asm/smp.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -95,7 +97,6 @@ static inline void play_dead(void) { /* This must be done before dead CPU ack */ cpu_exit_clear(); - wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; @@ -104,8 +105,8 @@ static inline void play_dead(void) * With physical CPU hotplug, we should halt the cpu */ local_irq_disable(); - while (1) - halt(); + /* mask all interrupts, flush any and all caches, and halt */ + wbinvd_halt(); } #else static inline void play_dead(void) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 91ffce47af8..08a9df08adb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,7 @@ #include <asm/proto.h> #include <asm/ia32.h> #include <asm/idle.h> +#include <asm/syscalls.h> asmlinkage extern void ret_from_fork(void); @@ -93,14 +94,13 @@ DECLARE_PER_CPU(int, cpu_state); static inline void play_dead(void) { idle_task_exit(); - wbinvd(); mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; local_irq_disable(); - while (1) - halt(); + /* mask all interrupts, flush any and all caches, and halt */ + wbinvd_halt(); } #else static inline void play_dead(void) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index ba19bb49bd0..58ce4b50211 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -69,7 +69,7 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) +static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); regno >>= 2; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 06a9f643817..724adfc63cb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -414,25 +414,20 @@ void native_machine_shutdown(void) /* The boot cpu is always logical cpu 0 */ int reboot_cpu_id = 0; - cpumask_of_cpu_ptr(newmask, reboot_cpu_id); #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_online(reboot_cpu)) { + cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; - cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); - } #endif /* Make certain the cpu I'm about to reboot on is online */ - if (!cpu_online(reboot_cpu_id)) { + if (!cpu_online(reboot_cpu_id)) reboot_cpu_id = smp_processor_id(); - cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); - } /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, newmask); + set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); /* O.K Now that I'm on the appropriate processor, * stop all of the others. diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index c30fe25d470..6f50664b2ba 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -20,11 +20,45 @@ #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define PAE_PGD_ATTR (_PAGE_PRESENT) +/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE + * ~ control_page + PAGE_SIZE are used as data storage and stack for + * jumping back + */ +#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) + +/* Minimal CPU state */ +#define ESP DATA(0x0) +#define CR0 DATA(0x4) +#define CR3 DATA(0x8) +#define CR4 DATA(0xc) + +/* other data */ +#define CP_VA_CONTROL_PAGE DATA(0x10) +#define CP_PA_PGD DATA(0x14) +#define CP_PA_SWAP_PAGE DATA(0x18) +#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) + .text .align PAGE_SIZE .globl relocate_kernel relocate_kernel: - movl 8(%esp), %ebp /* list of pages */ + /* Save the CPU context, used for jumping back */ + + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + pushf + + movl 20+8(%esp), %ebp /* list of pages */ + movl PTR(VA_CONTROL_PAGE)(%ebp), %edi + movl %esp, ESP(%edi) + movl %cr0, %eax + movl %eax, CR0(%edi) + movl %cr3, %eax + movl %eax, CR3(%edi) + movl %cr4, %eax + movl %eax, CR4(%edi) #ifdef CONFIG_X86_PAE /* map the control page at its virtual address */ @@ -138,15 +172,25 @@ relocate_kernel: relocate_new_kernel: /* read the arguments and say goodbye to the stack */ - movl 4(%esp), %ebx /* page_list */ - movl 8(%esp), %ebp /* list of pages */ - movl 12(%esp), %edx /* start address */ - movl 16(%esp), %ecx /* cpu_has_pae */ + movl 20+4(%esp), %ebx /* page_list */ + movl 20+8(%esp), %ebp /* list of pages */ + movl 20+12(%esp), %edx /* start address */ + movl 20+16(%esp), %ecx /* cpu_has_pae */ + movl 20+20(%esp), %esi /* preserve_context */ /* zero out flags, and disable interrupts */ pushl $0 popfl + /* save some information for jumping back */ + movl PTR(VA_CONTROL_PAGE)(%ebp), %edi + movl %edi, CP_VA_CONTROL_PAGE(%edi) + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, CP_PA_PGD(%edi) + movl PTR(PA_SWAP_PAGE)(%ebp), %eax + movl %eax, CP_PA_SWAP_PAGE(%edi) + movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) + /* get physical address of control page now */ /* this is impossible after page table switch */ movl PTR(PA_CONTROL_PAGE)(%ebp), %edi @@ -197,8 +241,90 @@ identity_mapped: xorl %eax, %eax movl %eax, %cr3 + movl CP_PA_SWAP_PAGE(%edi), %eax + pushl %eax + pushl %ebx + call swap_pages + addl $8, %esp + + /* To be certain of avoiding problems with self-modifying code + * I need to execute a serializing instruction here. + * So I flush the TLB, it's handy, and not processor dependent. + */ + xorl %eax, %eax + movl %eax, %cr3 + + /* set all of the registers to known values */ + /* leave %esp alone */ + + testl %esi, %esi + jnz 1f + xorl %edi, %edi + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %esi, %esi + xorl %ebp, %ebp + ret +1: + popl %edx + movl CP_PA_SWAP_PAGE(%edi), %esp + addl $PAGE_SIZE, %esp +2: + call *%edx + + /* get the re-entry point of the peer system */ + movl 0(%esp), %ebp + call 1f +1: + popl %ebx + subl $(1b - relocate_kernel), %ebx + movl CP_VA_CONTROL_PAGE(%ebx), %edi + lea PAGE_SIZE(%ebx), %esp + movl CP_PA_SWAP_PAGE(%ebx), %eax + movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx + pushl %eax + pushl %edx + call swap_pages + addl $8, %esp + movl CP_PA_PGD(%ebx), %eax + movl %eax, %cr3 + movl %cr0, %eax + orl $(1<<31), %eax + movl %eax, %cr0 + lea PAGE_SIZE(%edi), %esp + movl %edi, %eax + addl $(virtual_mapped - relocate_kernel), %eax + pushl %eax + ret + +virtual_mapped: + movl CR4(%edi), %eax + movl %eax, %cr4 + movl CR3(%edi), %eax + movl %eax, %cr3 + movl CR0(%edi), %eax + movl %eax, %cr0 + movl ESP(%edi), %esp + movl %ebp, %eax + + popf + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + /* Do the copies */ - movl %ebx, %ecx +swap_pages: + movl 8(%esp), %edx + movl 4(%esp), %ecx + pushl %ebp + pushl %ebx + pushl %edi + pushl %esi + movl %ecx, %ebx jmp 1f 0: /* top, read another word from the indirection page */ @@ -226,27 +352,31 @@ identity_mapped: movl %ecx, %esi /* For every source page do a copy */ andl $0xfffff000, %esi + movl %edi, %eax + movl %esi, %ebp + + movl %edx, %edi movl $1024, %ecx rep ; movsl - jmp 0b - -3: - /* To be certain of avoiding problems with self-modifying code - * I need to execute a serializing instruction here. - * So I flush the TLB, it's handy, and not processor dependent. - */ - xorl %eax, %eax - movl %eax, %cr3 + movl %ebp, %edi + movl %eax, %esi + movl $1024, %ecx + rep ; movsl - /* set all of the registers to known values */ - /* leave %esp alone */ + movl %eax, %edi + movl %edx, %esi + movl $1024, %ecx + rep ; movsl - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx - xorl %edx, %edx - xorl %esi, %esi - xorl %edi, %edi - xorl %ebp, %ebp + lea PAGE_SIZE(%ebp), %esi + jmp 0b +3: + popl %esi + popl %edi + popl %ebx + popl %ebp ret + + .globl kexec_control_code_size +.set kexec_control_code_size, . - relocate_kernel diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b520dae02bf..673f12cf6eb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -445,7 +445,7 @@ static void __init reserve_early_setup_data(void) * @size: Size of the crashkernel memory to reserve. * Returns the base address on success, and -1ULL on failure. */ -unsigned long long find_and_reserve_crashkernel(unsigned long long size) +unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) { const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long start = 0LL; @@ -670,6 +670,14 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) + /* + * Must be before kernel pagetables are setup + * or fixmap area is touched. + */ + vmi_init(); +#endif + /* after early param, so could get panic from serial */ reserve_early_setup_data(); @@ -731,6 +739,8 @@ void __init setup_arch(char **cmdline_p) num_physpages = max_pfn; check_efer(); + if (cpu_has_x2apic) + check_x2apic(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ @@ -788,10 +798,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn); -#ifdef CONFIG_X86_64 - dma32_reserve_bootmem(); -#endif - #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. @@ -806,20 +812,21 @@ void __init setup_arch(char **cmdline_p) #endif reserve_crashkernel(); +#ifdef CONFIG_X86_64 + /* + * dma32_reserve_bootmem() allocates bootmem which may conflict + * with the crashkernel command line, so do that after + * reserve_crashkernel() + */ + dma32_reserve_bootmem(); +#endif + reserve_ibft_region(); #ifdef CONFIG_KVM_CLOCK kvmclock_init(); #endif -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be after max_low_pfn is determined, and before kernel - * pagetables are setup. - */ - vmi_init(); -#endif - paravirt_pagetable_setup_start(swapper_pg_dir); paging_init(); paravirt_pagetable_setup_done(swapper_pg_dir); @@ -856,12 +863,6 @@ void __init setup_arch(char **cmdline_p) init_apic_mappings(); ioapic_init_mappings(); -#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) - if (def_to_bigsmp) - printk(KERN_WARNING "More than 8 CPUs detected and " - "CONFIG_X86_PC cannot handle it.\nUse " - "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); -#endif kvm_guest_init(); e820_reserve_resources(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f94c00..0e67f72d931 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP -cpumask_t *cpumask_of_cpu_map __read_mostly; -EXPORT_SYMBOL(cpumask_of_cpu_map); - -/* requires nr_cpu_ids to be initialized */ -static void __init setup_cpumask_of_cpu(void) -{ - int i; - - /* alloc_bootmem zeroes memory */ - cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); - for (i = 0; i < nr_cpu_ids; i++) - cpu_set(i, cpumask_of_cpu_map[i]); -} -#else -static inline void setup_cpumask_of_cpu(void) { } -#endif - #ifdef CONFIG_X86_32 /* * Great future not-so-futuristic plan: make i386 and x86_64 do it @@ -180,9 +162,16 @@ void __init setup_per_cpu_areas(void) printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); } - else + else { ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); + } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); @@ -197,9 +186,6 @@ void __init setup_per_cpu_areas(void) /* Setup node to cpumask map */ setup_node_to_cpumask_map(); - - /* Setup cpumask_of_cpu map */ - setup_cpumask_of_cpu(); } #endif diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb519d2d..6dd7e2b70a4 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -3,9 +3,18 @@ struct sigframe { char __user *pretcode; int sig; struct sigcontext sc; - struct _fpstate fpstate; + /* + * fpstate is unused. fpstate is moved/allocated after + * retcode[] below. This movement allows to have the FP state and the + * future state extensions (xsave) stay together. + * And at the same time retaining the unused fpstate, prevents changing + * the offset of extramask[] in the sigframe and thus prevent any + * legacy application accessing/modifying it. + */ + struct _fpstate fpstate_unused; unsigned long extramask[_NSIG_WORDS-1]; char retcode[8]; + /* fp state follows here */ }; struct rt_sigframe { @@ -15,13 +24,14 @@ struct rt_sigframe { void __user *puc; struct siginfo info; struct ucontext uc; - struct _fpstate fpstate; char retcode[8]; + /* fp state follows here */ }; #else struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; + /* fp state follows here */ }; #endif diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcdd893..8d380b699c0 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -26,6 +26,7 @@ #include <asm/uaccess.h> #include <asm/i387.h> #include <asm/vdso.h> +#include <asm/syscalls.h> #include "sigframe.h" @@ -159,28 +160,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user *buf; + void __user *buf; err |= __get_user(buf, &sc->fpstate); - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage unsigned long sys_sigreturn(unsigned long __unused) @@ -262,7 +249,7 @@ badframe: * Set up a signal frame. */ static int -setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, +setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask) { int tmp, err = 0; @@ -289,7 +276,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, err |= __put_user(regs->sp, &sc->sp_at_signal); err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); - tmp = save_i387(fpstate); + tmp = save_i387_xstate(fpstate); if (tmp < 0) err = 1; else @@ -306,7 +293,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, * Determine which stack to use.. */ static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, + void **fpstate) { unsigned long sp; @@ -332,6 +320,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) sp = (unsigned long) ka->sa.sa_restorer; } + if (used_math()) { + sp = sp - sig_xstate_size; + *fpstate = (struct _fpstate *) sp; + } + sp -= frame_size; /* * Align the stack pointer according to the i386 ABI, @@ -350,8 +343,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, void __user *restorer; int err = 0; int usig; + void __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -366,7 +360,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (err) goto give_sigsegv; - err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; @@ -427,8 +421,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, void __user *restorer; int err = 0; int usig; + void __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -447,13 +442,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, goto give_sigsegv; /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index b45ef8ddd65..4665b598a37 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -26,6 +26,7 @@ #include <asm/proto.h> #include <asm/ia32_unistd.h> #include <asm/mce.h> +#include <asm/syscalls.h> #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -54,60 +55,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, } /* - * Signal frame handlers. - */ - -static inline int save_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); - - if ((unsigned long)buf % 16) - printk("save_i387: bad fpstate %p\n", buf); - - if (!used_math()) - return 0; - clear_used_math(); /* trigger finit */ - if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); - if (err) - return err; - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); - } else { - if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, - sizeof(struct i387_fxsave_struct))) - return -1; - } - return 1; -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -static inline int restore_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } - return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); -} - -/* * Do a signal return; undo the signal stack. */ static int @@ -151,25 +98,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, { struct _fpstate __user * buf; err |= __get_user(buf, &sc->fpstate); - - if (buf) { - if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) - goto badframe; - err |= restore_i387(buf); - } else { - struct task_struct *me = current; - if (used_math()) { - clear_fpu(me); - clear_used_math(); - } - } + err |= restore_i387_xstate(buf); } err |= __get_user(*pax, &sc->ax); return err; - -badframe: - return 1; } asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) @@ -260,26 +193,23 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) sp = current->sas_ss_sp + current->sas_ss_size; } - return (void __user *)round_down(sp - size, 16); + return (void __user *)round_down(sp - size, 64); } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + void __user *fp = NULL; int err = 0; struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sig_xstate_size); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; - if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) - goto give_sigsegv; - - if (save_i387(fp) < 0) + if (save_i387_xstate(fp) < 0) err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; @@ -294,7 +224,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); + if (cpu_has_xsave) + err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); + else + err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->sp), diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 332512767f4..aa804c64b16 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) #else -struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define get_idle_for_cpu(x) (idle_thread_array[(x)]) #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif @@ -123,13 +123,12 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); static atomic_t init_deasserted; -static int boot_cpu_logical_apicid; /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; /* Set if we find a B stepping CPU */ -int __cpuinitdata smp_b_stepping; +static int __cpuinitdata smp_b_stepping; #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) @@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu) #endif #ifdef CONFIG_X86_32 +static int boot_cpu_logical_apicid; + u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void) /* * (This works even if the APIC is not enabled.) */ - phys_id = GET_APIC_ID(read_apic_id()); + phys_id = read_apic_id(); cpuid = smp_processor_id(); if (cpu_isset(cpuid, cpu_callin_map)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, @@ -326,12 +327,16 @@ static void __cpuinit start_secondary(void *unused) * for which cpus receive the IPI. Holding this * lock helps us to not include this cpu in a currently in progress * smp_call_function(). + * + * We need to hold vector_lock so there the set of online cpus + * does not change while we are assigning vectors to cpus. Holding + * this lock ensures we don't half assign or remove an irq from a cpu. */ ipi_call_lock_irq(); -#ifdef CONFIG_X86_IO_APIC - setup_vector_irq(smp_processor_id()); -#endif + lock_vector_lock(); + __setup_vector_irq(smp_processor_id()); cpu_set(smp_processor_id(), cpu_online_map); + unlock_vector_lock(); ipi_call_unlock_irq(); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; @@ -546,8 +551,7 @@ static inline void __inquire_remote_apic(int apicid) printk(KERN_CONT "a previous APIC delivery may have failed\n"); - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); - apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); + apic_icr_write(APIC_DM_REMRD | regs[i], apicid); timeout = 0; do { @@ -579,11 +583,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) int maxlvt; /* Target chip */ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); + apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -636,13 +638,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* * Turn INIT on target chip */ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* * Send IPI */ - apic_write(APIC_ICR, - APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, + phys_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -652,10 +652,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) pr_debug("Deasserting INIT.\n"); /* Target chip */ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Send IPI */ - apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); @@ -698,11 +696,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ /* Target chip */ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); - /* Boot on the stack */ /* Kick the second */ - apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); + apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), + phys_apicid); /* * Give the other CPU some time to accept the IPI. @@ -752,6 +749,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work) } #ifdef CONFIG_X86_64 + +/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ +static void __ref free_bootmem_pda(struct x8664_pda *oldpda) +{ + if (!after_bootmem) + free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); +} + /* * Allocate node local memory for the AP pda. * @@ -780,8 +785,7 @@ int __cpuinit get_local_pda(int cpu) if (oldpda) { memcpy(newpda, oldpda, size); - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, size); + free_bootmem_pda(oldpda); } newpda->in_bootmem = 0; @@ -1044,6 +1048,34 @@ static __init void disable_smp(void) static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); + +#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) + if (def_to_bigsmp && nr_cpu_ids > 8) { + unsigned int cpu; + unsigned nr; + + printk(KERN_WARNING + "More than 8 CPUs detected - skipping them.\n" + "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + + nr = 0; + for_each_present_cpu(cpu) { + if (nr >= 8) + cpu_clear(cpu, cpu_present_map); + nr++; + } + + nr = 0; + for_each_possible_cpu(cpu) { + if (nr >= 8) + cpu_clear(cpu, cpu_possible_map); + nr++; + } + + nr_cpu_ids = 8; + } +#endif + if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { printk(KERN_WARNING "weird, boot CPU (#%d) not listed" "by the BIOS.\n", hard_smp_processor_id()); @@ -1136,10 +1168,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ +#ifdef CONFIG_X86_32 boot_cpu_logical_apicid = logical_smp_processor_id(); +#endif current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); +#ifdef CONFIG_X86_64 + enable_IR_x2apic(); + setup_apic_routing(); +#endif + if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); @@ -1147,9 +1186,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } preempt_disable(); - if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { + if (read_apic_id() != boot_cpu_physical_apicid) { panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); + read_apic_id(), boot_cpu_physical_apicid); /* Or can we switch back to PIC here? */ } preempt_enable(); @@ -1182,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); setup_boot_clock(); + + if (is_uv_system()) + uv_system_init(); out: preempt_enable(); } @@ -1336,7 +1378,9 @@ int __cpu_disable(void) remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ + lock_vector_lock(); remove_cpu_from_maps(cpu); + unlock_vector_lock(); fixup_irqs(cpu_online_map); return 0; } @@ -1370,17 +1414,3 @@ void __cpu_die(unsigned int cpu) BUG(); } #endif - -/* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. - */ -static int __init parse_maxcpus(char *arg) -{ - extern unsigned int maxcpus; - - if (arg) - maxcpus = simple_strtoul(arg, NULL, 0); - return 0; -} -early_param("maxcpus", parse_maxcpus); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 99941b37eca..397e309839d 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -8,18 +8,21 @@ DEFINE_PER_CPU(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); -/* Initialize the CPU's GDT. This is either the boot CPU doing itself - (still using the master per-cpu area), or a CPU doing it for a - secondary which will soon come up. */ +/* + * Initialize the CPU's GDT. This is either the boot CPU doing itself + * (still using the master per-cpu area), or a CPU doing it for a + * secondary which will soon come up. + */ __cpuinit void init_gdt(int cpu) { - struct desc_struct *gdt = get_cpu_gdt_table(cpu); + struct desc_struct gdt; - pack_descriptor(&gdt[GDT_ENTRY_PERCPU], - __per_cpu_offset[cpu], 0xFFFFF, + pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; - gdt[GDT_ENTRY_PERCPU].s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(cpu_number, cpu) = cpu; diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index d67ce5f044b..7b987852e87 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -30,7 +30,7 @@ #include <linux/init.h> #include <asm/io.h> #include <asm/bios_ebda.h> -#include <asm/mach-summit/mach_mpparse.h> +#include <asm/summit/mpparse.h> static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 7066cb855a6..1884a8d12bf 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -22,6 +22,8 @@ #include <linux/uaccess.h> #include <linux/unistd.h> +#include <asm/syscalls.h> + asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef3381..c9288c883e2 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -16,6 +16,7 @@ #include <asm/uaccess.h> #include <asm/ia32.h> +#include <asm/syscalls.h> asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 170d43c1748..3d1be4f0fac 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -8,12 +8,12 @@ #define __NO_STUBS #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #include <asm/unistd_64.h> #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H typedef void (*sys_call_ptr_t)(void); diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index ffe3c664afc..bbecf8b6bf9 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -36,6 +36,7 @@ #include <asm/arch_hooks.h> #include <asm/hpet.h> #include <asm/time.h> +#include <asm/timer.h> #include "do_timer.h" diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d0fbb7712ab..8b8c0d6640f 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -17,6 +17,7 @@ #include <asm/genapic.h> #include <asm/idle.h> #include <asm/tsc.h> +#include <asm/irq_vectors.h> #include <mach_apic.h> @@ -783,7 +784,7 @@ static int __init uv_bau_init(void) uv_init_blade(blade, node, cur_cpu); cur_cpu += uv_blade_nr_possible_cpus(blade); } - set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); + alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); uv_enable_timeouts(); return 0; diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index ab6bf375a30..6bb7b8579e7 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -10,6 +10,7 @@ #include <asm/ldt.h> #include <asm/processor.h> #include <asm/proto.h> +#include <asm/syscalls.h> #include "tls.h" diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 03df8e45e5a..da5a5964fcc 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1228,7 +1228,6 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); - init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420..b42068fb7b7 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -339,9 +339,8 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl) { - printk("\nCall Trace:\n"); + printk("Call Trace:\n"); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); - printk("\n"); } void show_trace(struct task_struct *task, struct pt_regs *regs, @@ -386,6 +385,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -443,7 +443,6 @@ void show_registers(struct pt_regs *regs) printk("Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, regs->bp, ""); - printk("\n"); printk(KERN_EMERG "Code: "); @@ -1131,7 +1130,14 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ - restore_fpu_checking(&me->thread.xstate->fxsave); + /* + * Paranoid restore. send a SIGSEGV if we fail to restore the state. + */ + if (unlikely(restore_fpu_checking(me))) { + stts(); + force_sig(SIGSEGV, me); + return; + } task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } @@ -1166,10 +1172,6 @@ void __init trap_init(void) set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif /* - * initialize the per thread extended state: - */ - init_thread_xstate(); - /* * Should be a barrier for any external CPU state: */ cpu_init(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c055390..8f98e9de1b8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *pm, u64 *hpet) { u64 t1, t2; int i; @@ -122,80 +122,216 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } -/** - * native_calibrate_tsc - calibrate the tsc on boot +/* + * Try to calibrate the TSC against the Programmable + * Interrupt Timer and return the frequency of the TSC + * in kHz. + * + * Return ULONG_MAX on failure to calibrate. */ -unsigned long native_calibrate_tsc(void) +static unsigned long pit_calibrate_tsc(void) { - unsigned long flags; - u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(); - unsigned int tsc_khz_val = 0; - - local_irq_save(flags); - - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + u64 tsc, t1, t2, delta; + unsigned long tscmin, tscmax; + int pitcnt; + /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); + /* + * Setup CTC channel 2* for mode 0, (interrupt on terminal + * count mode), binary count. Set the latch register to 50ms + * (LSB then MSB) to begin countdown. + */ outb(0xb0, 0x43); outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); - tr1 = get_cycles(); - while ((inb(0x61) & 0x20) == 0); - tr2 = get_cycles(); - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + tsc = t1 = t2 = get_cycles(); - local_irq_restore(flags); + pitcnt = 0; + tscmax = 0; + tscmin = ULONG_MAX; + while ((inb(0x61) & 0x20) == 0) { + t2 = get_cycles(); + delta = t2 - tsc; + tsc = t2; + if ((unsigned long) delta < tscmin) + tscmin = (unsigned int) delta; + if ((unsigned long) delta > tscmax) + tscmax = (unsigned int) delta; + pitcnt++; + } /* - * Preset the result with the raw and inaccurate PIT - * calibration value + * Sanity checks: + * + * If we were not able to read the PIT more than 5000 + * times, then we have been hit by a massive SMI + * + * If the maximum is 10 times larger than the minimum, + * then we got hit by an SMI as well. */ - delta = (tr2 - tr1); + if (pitcnt < 5000 || tscmax > 10 * tscmin) + return ULONG_MAX; + + /* Calculate the PIT value */ + delta = t2 - t1; do_div(delta, 50); - tsc_khz_val = delta; + return delta; +} + + +/** + * native_calibrate_tsc - calibrate the tsc on boot + */ +unsigned long native_calibrate_tsc(void) +{ + u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; + unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; + unsigned long flags; + int hpet = is_hpet_enabled(), i; + + /* + * Run 5 calibration loops to get the lowest frequency value + * (the best estimate). We use two different calibration modes + * here: + * + * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and + * load a timeout of 50ms. We read the time right after we + * started the timer and wait until the PIT count down reaches + * zero. In each wait loop iteration we read the TSC and check + * the delta to the previous read. We keep track of the min + * and max values of that delta. The delta is mostly defined + * by the IO time of the PIT access, so we can detect when a + * SMI/SMM disturbance happend between the two reads. If the + * maximum time is significantly larger than the minimum time, + * then we discard the result and have another try. + * + * 2) Reference counter. If available we use the HPET or the + * PMTIMER as a reference to check the sanity of that value. + * We use separate TSC readouts and check inside of the + * reference read for a SMI/SMM disturbance. We dicard + * disturbed values here as well. We do that around the PIT + * calibration delay loop as we have to wait for a certain + * amount of time anyway. + */ + for (i = 0; i < 5; i++) { + unsigned long tsc_pit_khz; + + /* + * Read the start value and the reference count of + * hpet/pmtimer when available. Then do the PIT + * calibration, which will take at least 50ms, and + * read the end value. + */ + local_irq_save(flags); + tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + tsc_pit_khz = pit_calibrate_tsc(); + tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + local_irq_restore(flags); + + /* Pick the lowest PIT TSC calibration so far */ + tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); + + /* hpet or pmtimer available ? */ + if (!hpet && !pm1 && !pm2) + continue; + + /* Check, whether the sampling was disturbed by an SMI */ + if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) + continue; + + tsc2 = (tsc2 - tsc1) * 1000000LL; + + if (hpet) { + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tsc1, 1000000); + } else { + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tsc1 = pm2 * 1000000000LL; + do_div(tsc1, PMTMR_TICKS_PER_SEC); + } + + do_div(tsc2, tsc1); + tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + } + + /* + * Now check the results. + */ + if (tsc_pit_min == ULONG_MAX) { + /* PIT gave no useful value */ + printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); + + /* We don't have an alternative source, disable TSC */ + if (!hpet && !pm1 && !pm2) { + printk("TSC: No reference (HPET/PMTIMER) available\n"); + return 0; + } + + /* The alternative source failed as well, disable TSC */ + if (tsc_ref_min == ULONG_MAX) { + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " + "failed due to SMI disturbance.\n"); + return 0; + } + + /* Use the alternative source */ + printk(KERN_INFO "TSC: using %s reference calibration\n", + hpet ? "HPET" : "PMTIMER"); + + return tsc_ref_min; + } - /* hpet or pmtimer available ? */ + /* We don't have an alternative source, use the PIT calibration value */ if (!hpet && !pm1 && !pm2) { - printk(KERN_INFO "TSC calibrated against PIT\n"); - goto out; + printk(KERN_INFO "TSC: Using PIT calibration value\n"); + return tsc_pit_min; } - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { - printk(KERN_WARNING "TSC calibration disturbed by SMI, " - "using PIT calibration result\n"); - goto out; + /* The alternative source failed, use the PIT calibration value */ + if (tsc_ref_min == ULONG_MAX) { + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " + "to SMI disturbance. Using PIT calibration\n"); + return tsc_pit_min; } - tsc2 = (tsc2 - tsc1) * 1000000LL; - - if (hpet) { - printk(KERN_INFO "TSC calibrated against HPET\n"); - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 5% window, the we + * use the lower frequency of those as it is probably the + * closest estimate. + */ + if (delta >= 95 && delta <= 105) { + printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", + hpet ? "HPET" : "PMTIMER"); + printk(KERN_INFO "TSC: using %s calibration value\n", + tsc_pit_min <= tsc_ref_min ? "PIT" : + hpet ? "HPET" : "PMTIMER"); + return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; } - do_div(tsc2, tsc1); - tsc_khz_val = tsc2; + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); -out: - return tsc_khz_val; + /* + * The calibration values differ too much. In doubt, we use + * the PIT value as we know that there are PMTIMERs around + * running at double speed. + */ + printk(KERN_INFO "TSC: Using PIT calibration value\n"); + return tsc_pit_min; } - #ifdef CONFIG_X86_32 /* Only called from the Powernow K7 cpu freq driver */ int recalibrate_cpu_khz(void) @@ -314,7 +450,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, mark_tsc_unstable("cpufreq changes"); } - set_cyc2ns_scale(tsc_khz_ref, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu); return 0; } @@ -325,6 +461,10 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { + if (!cpu_has_tsc) + return 0; + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); return 0; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0577825cf89..9ffb01c31c4 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void) __raw_spin_unlock(&sync_lock); } } - if (!(now-start)) { - printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", + WARN(!(now-start), + "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", now-start, end-start); - WARN_ON(1); - } } /* diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 41e01b145c4..594ef47f0a6 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -184,8 +184,6 @@ static int __init visws_get_smp_config(unsigned int early) return 1; } -extern unsigned int __cpuinitdata maxcpus; - /* * The Visual Workstation is Intel MP compliant in the hardware * sense, but it doesn't have a BIOS(-configuration table). @@ -244,8 +242,8 @@ static int __init visws_find_smp_config(unsigned int reserve) ncpus = CO_CPU_MAX; } - if (ncpus > maxcpus) - ncpus = maxcpus; + if (ncpus > setup_max_cpus) + ncpus = setup_max_cpus; #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 38f566fa27d..4eeb5cf9720 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -46,6 +46,7 @@ #include <asm/io.h> #include <asm/tlbflush.h> #include <asm/irq.h> +#include <asm/syscalls.h> /* * Known problems: diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 0a1b1a9d922..61531d5c950 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -37,6 +37,7 @@ #include <asm/timer.h> #include <asm/vmi_time.h> #include <asm/kmap_types.h> +#include <asm/setup.h> /* Convenient for calling VMI functions indirectly in the ROM */ typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); @@ -683,7 +684,7 @@ void vmi_bringup(void) { /* We must establish the lowmem mapping for MMU ops to work */ if (vmi_ops.set_linear_mapping) - vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); + vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); } /* @@ -904,8 +905,8 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(pv_apic_ops.apic_read, APICRead); - para_fill(pv_apic_ops.apic_write, APICWrite); + para_fill(apic_ops->read, APICRead); + para_fill(apic_ops->write, APICWrite); #endif /* diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index cdb2363697d..a9b8560adbc 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -140,10 +140,10 @@ SECTIONS *(.con_initcall.init) __con_initcall_end = .; } - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - __x86cpuvendor_start = .; - *(.x86cpuvendor.init) - __x86cpuvendor_end = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; } SECURITY_INIT . = ALIGN(4); @@ -180,6 +180,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; @@ -209,3 +210,11 @@ SECTIONS DWARF_DEBUG } + +#ifdef CONFIG_KEXEC +/* Link time checks */ +#include <asm/kexec.h> + +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big") +#endif diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 63e5c1a22e8..201e81a91a9 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -168,13 +168,12 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; - . = ALIGN(16); - __x86cpuvendor_start = .; - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - *(.x86cpuvendor.init) + __x86_cpu_dev_start = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + *(.x86_cpu_dev.init) } - __x86cpuvendor_end = .; SECURITY_INIT + __x86_cpu_dev_end = .; . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c new file mode 100644 index 00000000000..07713d64deb --- /dev/null +++ b/arch/x86/kernel/xsave.c @@ -0,0 +1,316 @@ +/* + * xsave/xrstor support. + * + * Author: Suresh Siddha <suresh.b.siddha@intel.com> + */ +#include <linux/bootmem.h> +#include <linux/compat.h> +#include <asm/i387.h> +#ifdef CONFIG_IA32_EMULATION +#include <asm/sigcontext32.h> +#endif +#include <asm/xcr.h> + +/* + * Supported feature mask by the CPU and the kernel. + */ +u64 pcntxt_mask; + +struct _fpx_sw_bytes fx_sw_reserved; +#ifdef CONFIG_IA32_EMULATION +struct _fpx_sw_bytes fx_sw_reserved_ia32; +#endif + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw_user) +{ + int min_xstate_size = sizeof(struct i387_fxsave_struct) + + sizeof(struct xsave_hdr_struct); + unsigned int magic2; + int err; + + err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], + sizeof(struct _fpx_sw_bytes)); + + if (err) + return err; + + /* + * First Magic check failed. + */ + if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) + return -1; + + /* + * Check for error scenarios. + */ + if (fx_sw_user->xstate_size < min_xstate_size || + fx_sw_user->xstate_size > xstate_size || + fx_sw_user->xstate_size > fx_sw_user->extended_size) + return -1; + + err = __get_user(magic2, (__u32 *) (((void *)fpstate) + + fx_sw_user->extended_size - + FP_XSTATE_MAGIC2_SIZE)); + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (err || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + +#ifdef CONFIG_X86_64 +/* + * Signal frame handlers. + */ + +int save_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err = 0; + + if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) + return -EACCES; + + BUG_ON(sig_xstate_size < xstate_size); + + if ((unsigned long)buf % 64) + printk("save_i387_xstate: bad fpstate %p\n", buf); + + if (!used_math()) + return 0; + clear_used_math(); /* trigger finit */ + if (task_thread_info(tsk)->status & TS_USEDFPU) { + /* + * Start with clearing the user buffer. This will present a + * clean context for the bytes not touched by the fxsave/xsave. + */ + __clear_user(buf, sig_xstate_size); + + if (task_thread_info(tsk)->status & TS_XSAVE) + err = xsave_user(buf); + else + err = fxsave_user(buf); + + if (err) + return err; + task_thread_info(tsk)->status &= ~TS_USEDFPU; + stts(); + } else { + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, + xstate_size)) + return -1; + } + + if (task_thread_info(tsk)->status & TS_XSAVE) { + struct _fpstate __user *fx = buf; + + err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 __user *) (buf + sig_xstate_size + - FP_XSTATE_MAGIC2_SIZE)); + } + + return 1; +} + +/* + * Restore the extended state if present. Otherwise, restore the FP/SSE + * state. + */ +int restore_user_xstate(void __user *buf) +{ + struct _fpx_sw_bytes fx_sw_user; + u64 mask; + int err; + + if (((unsigned long)buf % 64) || + check_for_xstate(buf, buf, &fx_sw_user)) + goto fx_only; + + mask = fx_sw_user.xstate_bv; + + /* + * restore the state passed by the user. + */ + err = xrestore_user(buf, mask); + if (err) + return err; + + /* + * init the state skipped by the user. + */ + mask = pcntxt_mask & ~mask; + + xrstor_state(init_xstate_buf, mask); + + return 0; + +fx_only: + /* + * couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); + return fxrstor_checking((__force struct i387_fxsave_struct *)buf); +} + +/* + * This restores directly out of user space. Exceptions are handled. + */ +int restore_i387_xstate(void __user *buf) +{ + struct task_struct *tsk = current; + int err = 0; + + if (!buf) { + if (used_math()) + goto clear; + return 0; + } else + if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) + return -EACCES; + + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } + + if (!(task_thread_info(current)->status & TS_USEDFPU)) { + clts(); + task_thread_info(current)->status |= TS_USEDFPU; + } + if (task_thread_info(tsk)->status & TS_XSAVE) + err = restore_user_xstate(buf); + else + err = fxrstor_checking((__force struct i387_fxsave_struct *) + buf); + if (unlikely(err)) { + /* + * Encountered an error while doing the restore from the + * user buffer, clear the fpu state. + */ +clear: + clear_fpu(tsk); + clear_used_math(); + } + return err; +} +#endif + +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +void prepare_fx_sw_frame(void) +{ + int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + + FP_XSTATE_MAGIC2_SIZE; + + sig_xstate_size = sizeof(struct _fpstate) + size_extended; + +#ifdef CONFIG_IA32_EMULATION + sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; +#endif + + memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.xstate_bv = pcntxt_mask; + fx_sw_reserved.xstate_size = xstate_size; +#ifdef CONFIG_IA32_EMULATION + memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, + sizeof(struct _fpx_sw_bytes)); + fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; +#endif +} + +/* + * Represents init state for the supported extended state. + */ +struct xsave_struct *init_xstate_buf; + +#ifdef CONFIG_X86_64 +unsigned int sig_xstate_size = sizeof(struct _fpstate); +#endif + +/* + * Enable the extended processor state save/restore feature + */ +void __cpuinit xsave_init(void) +{ + if (!cpu_has_xsave) + return; + + set_in_cr4(X86_CR4_OSXSAVE); + + /* + * Enable all the features that the HW is capable of + * and the Linux kernel is aware of. + */ + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); +} + +/* + * setup the xstate image representing the init state + */ +void setup_xstate_init(void) +{ + init_xstate_buf = alloc_bootmem(xstate_size); + init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; +} + +/* + * Enable and initialize the xsave feature. + */ +void __init xsave_cntxt_init(void) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + pcntxt_mask = eax + ((u64)edx << 32); + + if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", + pcntxt_mask); + BUG(); + } + + /* + * for now OS knows only about FP/SSE + */ + pcntxt_mask = pcntxt_mask & XCNTXT_MASK; + xsave_init(); + + /* + * Recompute the context size for enabled features + */ + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + xstate_size = ebx; + + prepare_fx_sw_frame(); + + setup_xstate_init(); + + printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " + "cntxt size 0x%x\n", + pcntxt_mask, xstate_size); +} diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8d45fabc5f3..ce3251ce550 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b0e4ddca6c1..0bfe2bd305e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) account_shadowed(kvm, gfn); } +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte; + int need_tlb_flush = 0; + + while ((spte = rmap_next(kvm, rmapp, NULL))) { + BUG_ON(!(*spte & PT_PRESENT_MASK)); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + need_tlb_flush = 1; + } + return need_tlb_flush; +} + +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + int (*handler)(struct kvm *kvm, unsigned long *rmapp)) +{ + int i; + int retval = 0; + + /* + * If mmap_sem isn't taken, we can look the memslots with only + * the mmu_lock by skipping over the slots with userspace_addr == 0. + */ + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end; + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + retval |= handler(kvm, &memslot->rmap[gfn_offset]); + retval |= handler(kvm, + &memslot->lpage_info[ + gfn_offset / + KVM_PAGES_PER_HPAGE].rmap_pde); + } + } + + return retval; +} + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); +} + +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte; + int young = 0; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + int _young; + u64 _spte = *spte; + BUG_ON(!(_spte & PT_PRESENT_MASK)); + _young = _spte & PT_ACCESSED_MASK; + if (_young) { + young = 1; + clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); + } + spte = rmap_next(kvm, rmapp, spte); + } + return young; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + return kvm_handle_hva(kvm, hva, kvm_age_rmapp); +} + #ifdef MMU_DEBUG static int is_empty_shadow_page(u64 *spt) { @@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) int r; int largepage = 0; pfn_t pfn; + unsigned long mmu_seq; down_read(¤t->mm->mmap_sem); if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { @@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) largepage = 1; } + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* implicit mb(), we'll read before PT lock is unlocked */ pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); @@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) } spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu, mmu_seq)) + goto out_unlock; kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, v, write, largepage, gfn, pfn, PT32E_ROOT_LEVEL); @@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) return r; + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + return 0; } @@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, int r; int largepage = 0; gfn_t gfn = gpa >> PAGE_SHIFT; + unsigned long mmu_seq; ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); @@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); largepage = 1; } + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* implicit mb(), we'll read before PT lock is unlocked */ pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); if (is_error_pfn(pfn)) { @@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, return 1; } spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu, mmu_seq)) + goto out_unlock; kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); spin_unlock(&vcpu->kvm->mmu_lock); return r; + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + return 0; } static void nonpaging_free(struct kvm_vcpu *vcpu) @@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); vcpu->arch.update_pte.largepage = 1; } + vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* implicit mb(), we'll read before PT lock is unlocked */ pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); @@ -1814,6 +1914,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) spin_unlock(&vcpu->kvm->mmu_lock); return r; } +EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) { @@ -1870,6 +1971,12 @@ void kvm_enable_tdp(void) } EXPORT_SYMBOL_GPL(kvm_enable_tdp); +void kvm_disable_tdp(void) +{ + tdp_enabled = false; +} +EXPORT_SYMBOL_GPL(kvm_disable_tdp); + static void free_mmu_pages(struct kvm_vcpu *vcpu) { struct kvm_mmu_page *sp; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4d918220bae..4a814bff21f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, pfn = vcpu->arch.update_pte.pfn; if (is_error_pfn(pfn)) return; + if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) + return; kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), @@ -343,7 +345,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, shadow_addr = __pa(shadow_page->spt); shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; - *shadow_ent = shadow_pte; + set_shadow_pte(shadow_ent, shadow_pte); } mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, @@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int r; pfn_t pfn; int largepage = 0; + unsigned long mmu_seq; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); @@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, largepage = 1; } } + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* implicit mb(), we'll read before PT lock is unlocked */ pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); up_read(¤t->mm->mmap_sem); @@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, } spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu, mmu_seq)) + goto out_unlock; kvm_mmu_free_some_pages(vcpu); shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, largepage, &write_pt, pfn); @@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, spin_unlock(&vcpu->kvm->mmu_lock); return write_pt; + +out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + return 0; } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b756e876dce..e2ee264740c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -453,7 +453,8 @@ static __init int svm_hardware_setup(void) if (npt_enabled) { printk(KERN_INFO "kvm: Nested Paging enabled\n"); kvm_enable_tdp(); - } + } else + kvm_disable_tdp(); return 0; @@ -1007,10 +1008,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) struct kvm *kvm = svm->vcpu.kvm; u64 fault_address; u32 error_code; + bool event_injection = false; if (!irqchip_in_kernel(kvm) && - is_external_interrupt(exit_int_info)) + is_external_interrupt(exit_int_info)) { + event_injection = true; push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); + } fault_address = svm->vmcb->control.exit_info_2; error_code = svm->vmcb->control.exit_info_1; @@ -1024,6 +1028,8 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) (u32)fault_address, (u32)(fault_address >> 32), handler); + if (event_injection) + kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0cac6370171..2a69773e3b2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2298,6 +2298,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); + if (vect_info & VECTORING_INFO_VALID_MASK) + kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } @@ -3116,15 +3118,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return ERR_PTR(-ENOMEM); allocate_vpid(vmx); - if (id == 0 && vm_need_ept()) { - kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); - kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, - VMX_EPT_FAKE_DIRTY_MASK, 0ull, - VMX_EPT_EXECUTABLE_MASK); - kvm_enable_tdp(); - } err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) @@ -3303,8 +3296,17 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); - if (cpu_has_vmx_ept()) + if (vm_need_ept()) { bypass_guest_pf = 0; + kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | + VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, + VMX_EPT_FAKE_DIRTY_MASK, 0ull, + VMX_EPT_EXECUTABLE_MASK); + kvm_enable_tdp(); + } else + kvm_disable_tdp(); if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f1cdb011cf..0d682fc6aeb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: + case KVM_CAP_SYNC_MMU: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, goto out; down_write(&kvm->slots_lock); + spin_lock(&kvm->mmu_lock); p = &kvm->arch.aliases[alias->slot]; p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; @@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, break; kvm->arch.naliases = n; + spin_unlock(&kvm->mmu_lock); kvm_mmu_zap_all(kvm); up_write(&kvm->slots_lock); @@ -3184,6 +3187,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, kvm_desct->base |= seg_desc->base2 << 24; kvm_desct->limit = seg_desc->limit0; kvm_desct->limit |= seg_desc->limit << 16; + if (seg_desc->g) { + kvm_desct->limit <<= 12; + kvm_desct->limit |= 0xfff; + } kvm_desct->selector = selector; kvm_desct->type = seg_desc->type; kvm_desct->present = seg_desc->p; @@ -3223,6 +3230,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct desc_struct *seg_desc) { + gpa_t gpa; struct descriptor_table dtable; u16 index = selector >> 3; @@ -3232,13 +3240,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); return 1; } - return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); + gpa += index * 8; + return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8); } /* allowed just for 8 bytes segments */ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct desc_struct *seg_desc) { + gpa_t gpa; struct descriptor_table dtable; u16 index = selector >> 3; @@ -3246,7 +3257,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, if (dtable.limit < index * 8 + 7) return 1; - return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); + gpa += index * 8; + return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8); } static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, @@ -3258,55 +3271,7 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, base_addr |= (seg_desc->base1 << 16); base_addr |= (seg_desc->base2 << 24); - return base_addr; -} - -static int load_tss_segment32(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc, - struct tss_segment_32 *tss) -{ - u32 base_addr; - - base_addr = get_tss_base_addr(vcpu, seg_desc); - - return kvm_read_guest(vcpu->kvm, base_addr, tss, - sizeof(struct tss_segment_32)); -} - -static int save_tss_segment32(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc, - struct tss_segment_32 *tss) -{ - u32 base_addr; - - base_addr = get_tss_base_addr(vcpu, seg_desc); - - return kvm_write_guest(vcpu->kvm, base_addr, tss, - sizeof(struct tss_segment_32)); -} - -static int load_tss_segment16(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc, - struct tss_segment_16 *tss) -{ - u32 base_addr; - - base_addr = get_tss_base_addr(vcpu, seg_desc); - - return kvm_read_guest(vcpu->kvm, base_addr, tss, - sizeof(struct tss_segment_16)); -} - -static int save_tss_segment16(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc, - struct tss_segment_16 *tss) -{ - u32 base_addr; - - base_addr = get_tss_base_addr(vcpu, seg_desc); - - return kvm_write_guest(vcpu->kvm, base_addr, tss, - sizeof(struct tss_segment_16)); + return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); } static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) @@ -3466,20 +3431,26 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, } static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, - struct desc_struct *cseg_desc, + u32 old_tss_base, struct desc_struct *nseg_desc) { struct tss_segment_16 tss_segment_16; int ret = 0; - if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) + if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, + sizeof tss_segment_16)) goto out; save_state_to_tss16(vcpu, &tss_segment_16); - save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); - if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) + if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, + sizeof tss_segment_16)) + goto out; + + if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_16, sizeof tss_segment_16)) goto out; + if (load_state_from_tss16(vcpu, &tss_segment_16)) goto out; @@ -3489,20 +3460,26 @@ out: } static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, - struct desc_struct *cseg_desc, + u32 old_tss_base, struct desc_struct *nseg_desc) { struct tss_segment_32 tss_segment_32; int ret = 0; - if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) + if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, + sizeof tss_segment_32)) goto out; save_state_to_tss32(vcpu, &tss_segment_32); - save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); - if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) + if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, + sizeof tss_segment_32)) goto out; + + if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_32, sizeof tss_segment_32)) + goto out; + if (load_state_from_tss32(vcpu, &tss_segment_32)) goto out; @@ -3517,16 +3494,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) struct desc_struct cseg_desc; struct desc_struct nseg_desc; int ret = 0; + u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); + u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); - kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); + old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); + /* FIXME: Handle errors. Failure to read either TSS or their + * descriptors should generate a pagefault. + */ if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) goto out; - if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) + if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) goto out; - if (reason != TASK_SWITCH_IRET) { int cpl; @@ -3544,8 +3525,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { cseg_desc.type &= ~(1 << 1); //clear the B flag - save_guest_segment_descriptor(vcpu, tr_seg.selector, - &cseg_desc); + save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); } if (reason == TASK_SWITCH_IRET) { @@ -3557,10 +3537,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_x86_ops->cache_regs(vcpu); if (nseg_desc.type & 8) - ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, + ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, &nseg_desc); else - ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, + ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, &nseg_desc); if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { @@ -3995,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, */ if (!user_alloc) { if (npages && !old.rmap) { + unsigned long userspace_addr; + down_write(¤t->mm->mmap_sem); - memslot->userspace_addr = do_mmap(NULL, 0, - npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - 0); + userspace_addr = do_mmap(NULL, 0, + npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + 0); up_write(¤t->mm->mmap_sem); - if (IS_ERR((void *)memslot->userspace_addr)) - return PTR_ERR((void *)memslot->userspace_addr); + if (IS_ERR((void *)userspace_addr)) + return PTR_ERR((void *)userspace_addr); + + /* set userspace_addr atomically for kvm_hva_to_rmapp */ + spin_lock(&kvm->mmu_lock); + memslot->userspace_addr = userspace_addr; + spin_unlock(&kvm->mmu_lock); } else { if (!old.user_alloc && old.rmap) { int ret; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 0313a5eec41..65f0b8a47be 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -55,6 +55,7 @@ #include <linux/lguest_launcher.h> #include <linux/virtio_console.h> #include <linux/pm.h> +#include <asm/apic.h> #include <asm/lguest.h> #include <asm/paravirt.h> #include <asm/param.h> @@ -783,14 +784,44 @@ static void lguest_wbinvd(void) * code qualifies for Advanced. It will also never interrupt anything. It * does, however, allow us to get through the Linux boot code. */ #ifdef CONFIG_X86_LOCAL_APIC -static void lguest_apic_write(unsigned long reg, u32 v) +static void lguest_apic_write(u32 reg, u32 v) { } -static u32 lguest_apic_read(unsigned long reg) +static u32 lguest_apic_read(u32 reg) { return 0; } + +static u64 lguest_apic_icr_read(void) +{ + return 0; +} + +static void lguest_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void lguest_apic_wait_icr_idle(void) +{ + return; +} + +static u32 lguest_apic_safe_wait_icr_idle(void) +{ + return 0; +} + +static struct apic_ops lguest_basic_apic_ops = { + .read = lguest_apic_read, + .write = lguest_apic_write, + .icr_read = lguest_apic_icr_read, + .icr_write = lguest_apic_icr_write, + .wait_icr_idle = lguest_apic_wait_icr_idle, + .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle, +}; #endif /* STOP! Until an interrupt comes in. */ @@ -990,8 +1021,7 @@ __init void lguest_init(void) #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ - pv_apic_ops.apic_write = lguest_apic_write; - pv_apic_ops.apic_read = lguest_apic_read; + apic_ops = &lguest_basic_apic_ops; #endif /* time operations */ @@ -1014,6 +1044,9 @@ __init void lguest_init(void) init_pg_tables_start = __pa(pg0); init_pg_tables_end = __pa(pg0); + /* As described in head_32.S, we map the first 128M of memory. */ + max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; + /* Load the %fs segment register (the per-cpu segment register) with * the normal data segment to get through booting. */ asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index aa3fa411942..55e11aa6d66 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -17,9 +17,6 @@ ifeq ($(CONFIG_X86_32),y) lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += io_64.o iomap_copy_64.o - - CFLAGS_csum-partial_64.o := -funroll-loops - lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dfdf428975c..f118c110af3 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -52,7 +52,7 @@ jnz 100b 102: .section .fixup,"ax" -103: addl %r8d,%edx /* ecx is zerorest also */ +103: addl %ecx,%edx /* ecx is zerorest also */ jmp copy_user_handle_tail .previous diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 40e0e309d27..cb0c112386f 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -32,7 +32,7 @@ jnz 100b 102: .section .fixup,"ax" -103: addl %r8d,%edx /* ecx is zerorest also */ +103: addl %ecx,%edx /* ecx is zerorest also */ jmp copy_user_handle_tail .previous @@ -108,7 +108,6 @@ ENTRY(__copy_user_nocache) jmp 60f 50: movl %ecx,%edx 60: sfence - movl %r8d,%ecx jmp copy_user_handle_tail .previous diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c index d5a2b39f882..01b868ba82f 100644 --- a/arch/x86/lib/msr-on-cpu.c +++ b/arch/x86/lib/msr-on-cpu.c @@ -30,10 +30,11 @@ static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) rv.msr_no = msr_no; if (safe) { - smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - err = rv.err; + err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, + &rv, 1); + err = err ? err : rv.err; } else { - smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); + err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); } *l = rv.l; *h = rv.h; @@ -64,23 +65,24 @@ static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) rv.l = l; rv.h = h; if (safe) { - smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); - err = rv.err; + err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, + &rv, 1); + err = err ? err : rv.err; } else { - smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); + err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); } return err; } -void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { - _wrmsr_on_cpu(cpu, msr_no, l, h, 0); + return _wrmsr_on_cpu(cpu, msr_no, l, h, 0); } -void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { - _rdmsr_on_cpu(cpu, msr_no, l, h, 0); + return _rdmsr_on_cpu(cpu, msr_no, l, h, 0); } /* These "safe" variants are slower and should be used when the target MSR diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 24e60944971..9e68075544f 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -14,6 +14,13 @@ #include <asm/uaccess.h> #include <asm/mmx.h> +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Alignment at which movsl is preferred for bulk memory copies. + */ +struct movsl_mask movsl_mask __read_mostly; +#endif + static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) { #ifdef CONFIG_X86_INTEL_USERCOPY diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 3d317836be9..37b9ae4d44c 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -10,13 +10,15 @@ #include <asm/e820.h> #include <asm/setup.h> +#include <mach_ipi.h> + #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) #else #define DEFAULT_SEND_IPI (0) #endif -int no_broadcast=DEFAULT_SEND_IPI; +int no_broadcast = DEFAULT_SEND_IPI; /** * pre_intr_init_hook - initialisation prior to setting up interrupt vectors @@ -36,15 +38,6 @@ void __init pre_intr_init_hook(void) init_ISA_irqs(); } -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - /** * intr_init_hook - post gate setup interrupt initialisation * @@ -60,12 +53,6 @@ void __init intr_init_hook(void) if (x86_quirks->arch_intr_init()) return; } -#ifdef CONFIG_X86_LOCAL_APIC - apic_intr_init(); -#endif - - if (!acpi_ioapic) - setup_irq(2, &irq2); } /** diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile deleted file mode 100644 index 3ef8b43b62f..00000000000 --- a/arch/x86/mach-es7000/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-$(CONFIG_X86_ES7000) := es7000plat.o diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/mach-es7000/es7000.h deleted file mode 100644 index c8d5aa132fa..00000000000 --- a/arch/x86/mach-es7000/es7000.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Written by: Garry Forsgren, Unisys Corporation - * Natalie Protasevich, Unisys Corporation - * This file contains the code to configure and interface - * with Unisys ES7000 series hardware system manager. - * - * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Unisys Corporation, Township Line & Union Meeting - * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: - * - * http://www.unisys.com - */ - -/* - * ES7000 chipsets - */ - -#define NON_UNISYS 0 -#define ES7000_CLASSIC 1 -#define ES7000_ZORRO 2 - - -#define MIP_REG 1 -#define MIP_PSAI_REG 4 - -#define MIP_BUSY 1 -#define MIP_SPIN 0xf0000 -#define MIP_VALID 0x0100000000000000ULL -#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) - -#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) - -struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; -}; - -struct part_info { - unsigned char type; - unsigned char length; - unsigned char part_id; - unsigned char apic_mode; - unsigned long snum; - char ptype[16]; - char sname[64]; - char pname[64]; -}; - -struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; -}; - -struct es7000_mem_info { - unsigned char type; - unsigned char length; - unsigned char resv[6]; - unsigned long long start; - unsigned long long size; -}; - -struct es7000_oem_table { - unsigned long long hdr; - struct mip_reg_info mip; - struct part_info pif; - struct es7000_mem_info shm; - struct psai psai; -}; - -#ifdef CONFIG_ACPI - -struct oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; -}; - -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -#endif - -struct mip_reg { - unsigned long long off_0; - unsigned long long off_8; - unsigned long long off_10; - unsigned long long off_18; - unsigned long long off_20; - unsigned long long off_28; - unsigned long long off_30; - unsigned long long off_38; -}; - -#define MIP_SW_APIC 0x1020b -#define MIP_FUNC(VALUE) (VALUE & 0xff) - -extern int parse_unisys_oem (char *oemptr); -extern void setup_unisys(void); -extern int es7000_start_cpu(int cpu, unsigned long eip); -extern void es7000_sw_apic(void); diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 0dbd7803a1d..6730f4e7c74 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -9,4 +9,3 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT) += summit.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o obj-$(CONFIG_X86_ES7000) += es7000.o -obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 59d77171455..df37fc9d6a2 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -5,18 +5,17 @@ #define APIC_DEFINITION 1 #include <linux/threads.h> #include <linux/cpumask.h> -#include <asm/smp.h> #include <asm/mpspec.h> #include <asm/genapic.h> #include <asm/fixmap.h> #include <asm/apicdef.h> #include <linux/kernel.h> -#include <linux/smp.h> #include <linux/init.h> #include <linux/dmi.h> -#include <asm/mach-bigsmp/mach_apic.h> -#include <asm/mach-bigsmp/mach_apicdef.h> -#include <asm/mach-bigsmp/mach_ipi.h> +#include <asm/bigsmp/apicdef.h> +#include <linux/smp.h> +#include <asm/bigsmp/apic.h> +#include <asm/bigsmp/ipi.h> #include <asm/mach-default/mach_mpparse.h> static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 4742626f08c..520cca0ee04 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -4,20 +4,19 @@ #define APIC_DEFINITION 1 #include <linux/threads.h> #include <linux/cpumask.h> -#include <asm/smp.h> #include <asm/mpspec.h> #include <asm/genapic.h> #include <asm/fixmap.h> #include <asm/apicdef.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/smp.h> #include <linux/init.h> -#include <asm/mach-es7000/mach_apicdef.h> -#include <asm/mach-es7000/mach_apic.h> -#include <asm/mach-es7000/mach_ipi.h> -#include <asm/mach-es7000/mach_mpparse.h> -#include <asm/mach-es7000/mach_wakecpu.h> +#include <asm/es7000/apicdef.h> +#include <linux/smp.h> +#include <asm/es7000/apic.h> +#include <asm/es7000/ipi.h> +#include <asm/es7000/mpparse.h> +#include <asm/es7000/wakecpu.h> static int probe_es7000(void) { diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8091e68764c..8cf58394975 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -4,7 +4,6 @@ #define APIC_DEFINITION 1 #include <linux/threads.h> #include <linux/cpumask.h> -#include <linux/smp.h> #include <asm/mpspec.h> #include <asm/genapic.h> #include <asm/fixmap.h> @@ -12,11 +11,12 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/init.h> -#include <asm/mach-numaq/mach_apic.h> -#include <asm/mach-numaq/mach_apicdef.h> -#include <asm/mach-numaq/mach_ipi.h> -#include <asm/mach-numaq/mach_mpparse.h> -#include <asm/mach-numaq/mach_wakecpu.h> +#include <asm/numaq/apicdef.h> +#include <linux/smp.h> +#include <asm/numaq/apic.h> +#include <asm/numaq/ipi.h> +#include <asm/numaq/mpparse.h> +#include <asm/numaq/wakecpu.h> #include <asm/numaq.h> static int mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index a97ea0f35b1..6ad6b67a723 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -4,19 +4,18 @@ #define APIC_DEFINITION 1 #include <linux/threads.h> #include <linux/cpumask.h> -#include <asm/smp.h> #include <asm/mpspec.h> #include <asm/genapic.h> #include <asm/fixmap.h> #include <asm/apicdef.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/smp.h> #include <linux/init.h> -#include <asm/mach-summit/mach_apic.h> -#include <asm/mach-summit/mach_apicdef.h> -#include <asm/mach-summit/mach_ipi.h> -#include <asm/mach-summit/mach_mpparse.h> +#include <asm/summit/apicdef.h> +#include <linux/smp.h> +#include <asm/summit/apic.h> +#include <asm/summit/ipi.h> +#include <asm/summit/mpparse.h> static int probe_summit(void) { diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c index a037041817c..4f4e50c3ad3 100644 --- a/arch/x86/mach-rdc321x/platform.c +++ b/arch/x86/mach-rdc321x/platform.c @@ -25,7 +25,6 @@ #include <linux/list.h> #include <linux/device.h> #include <linux/platform_device.h> -#include <linux/version.h> #include <linux/leds.h> #include <asm/gpio.h> diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 1fbb844c3d7..dfb932dcf13 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,5 +1,5 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o + pat.o pgtable.o gup.o obj-$(CONFIG_X86_32) += pgtable_32.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 455f3fe67b4..8f92cac4e6d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -35,6 +35,7 @@ #include <asm/tlbflush.h> #include <asm/proto.h> #include <asm-generic/sections.h> +#include <asm/traps.h> /* * Page fault error code bits @@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address) return 0; } -void do_invalid_op(struct pt_regs *, unsigned long); - static int is_f00f_bug(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_F00F_BUG diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c new file mode 100644 index 00000000000..007bb06c750 --- /dev/null +++ b/arch/x86/mm/gup.c @@ -0,0 +1,298 @@ +/* + * Lockless get_user_pages_fast for x86 + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmstat.h> +#include <linux/highmem.h> + +#include <asm/pgtable.h> + +static inline pte_t gup_get_pte(pte_t *ptep) +{ +#ifndef CONFIG_X86_PAE + return *ptep; +#else + /* + * With get_user_pages_fast, we walk down the pagetables without taking + * any locks. For this we would like to load the pointers atoimcally, + * but that is not possible (without expensive cmpxchg8b) on PAE. What + * we do have is the guarantee that a pte will only either go from not + * present to present, or present to not present or both -- it will not + * switch to a completely different present page without a TLB flush in + * between; something that we are blocking by holding interrupts off. + * + * Setting ptes from not present to present goes: + * ptep->pte_high = h; + * smp_wmb(); + * ptep->pte_low = l; + * + * And present to not present goes: + * ptep->pte_low = 0; + * smp_wmb(); + * ptep->pte_high = 0; + * + * We must ensure here that the load of pte_low sees l iff pte_high + * sees h. We load pte_high *after* loading pte_low, which ensures we + * don't see an older value of pte_high. *Then* we recheck pte_low, + * which ensures that we haven't picked up a changed pte high. We might + * have got rubbish values from pte_low and pte_high, but we are + * guaranteed that pte_low will not have the present bit set *unless* + * it is 'l'. And get_user_pages_fast only operates on present ptes, so + * we're safe. + * + * gup_get_pte should not be used or copied outside gup.c without being + * very careful -- it does not atomically load the pte or anything that + * is likely to be useful for you. + */ + pte_t pte; + +retry: + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + if (unlikely(pte.pte_low != ptep->pte_low)) + goto retry; + + return pte; +#endif +} + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask; + pte_t *ptep; + + mask = _PAGE_PRESENT|_PAGE_USER; + if (write) + mask |= _PAGE_RW; + + ptep = pte_offset_map(&pmd, addr); + do { + pte_t pte = gup_get_pte(ptep); + struct page *page; + + if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { + pte_unmap(ptep); + return 0; + } + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + get_page(page); + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + pte_unmap(ptep - 1); + + return 1; +} + +static inline void get_head_page_multiple(struct page *page, int nr) +{ + VM_BUG_ON(page != compound_head(page)); + VM_BUG_ON(page_count(page) == 0); + atomic_add(nr, &page->_count); +} + +static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask; + pte_t pte = *(pte_t *)&pmd; + struct page *head, *page; + int refs; + + mask = _PAGE_PRESENT|_PAGE_USER; + if (write) + mask |= _PAGE_RW; + if ((pte_val(pte) & mask) != mask) + return 0; + /* hugepages are never "special" */ + VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + get_head_page_multiple(head, refs); + + return 1; +} + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = pmd_offset(&pud, addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (unlikely(pmd_large(pmd))) { + if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) + return 0; + } else { + if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + return 0; + } + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static noinline int gup_huge_pud(pud_t pud, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask; + pte_t pte = *(pte_t *)&pud; + struct page *head, *page; + int refs; + + mask = _PAGE_PRESENT|_PAGE_USER; + if (write) + mask |= _PAGE_RW; + if ((pte_val(pte) & mask) != mask) + return 0; + /* hugepages are never "special" */ + VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + get_head_page_multiple(head, refs); + + return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp; + + pudp = pud_offset(&pgd, addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (unlikely(pud_large(pud))) { + if (!gup_huge_pud(pud, addr, next, write, pages, nr)) + return 0; + } else { + if (!gup_pmd_range(pud, addr, next, write, pages, nr)) + return 0; + } + } while (pudp++, addr = next, addr != end); + + return 1; +} + +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + start, len))) + goto slow_irqon; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed on x86. + * + * So long as we atomically load page table pointers versus teardown + * (which we do on x86, with the above PAE exception), we can follow the + * address down to the the page and take a ref on it. + */ + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; + +slow: + local_irq_enable(); +slow_irqon: + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d37f29376b0..4974e97dedf 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -47,6 +47,7 @@ #include <asm/paravirt.h> #include <asm/setup.h> #include <asm/cacheflush.h> +#include <asm/smp.h> unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec37121f670..1f6806b62eb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -60,7 +60,7 @@ static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -int direct_gbpages __meminitdata +int direct_gbpages #ifdef CONFIG_DIRECT_GBPAGES = 1 #endif @@ -86,46 +86,69 @@ early_param("gbpages", parse_direct_gbpages_on); * around without checking the pgd every time. */ -void show_mem(void) +int after_bootmem; + +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* + * noexec=on|off + * Control non-executable mappings for 64-bit processes. + * + * on Enable (default) + * off Disable + */ +static int __init nonx_setup(char *str) { - long i, total = 0, reserved = 0; - long shared = 0, cached = 0; - struct page *page; - pg_data_t *pgdat; - - printk(KERN_INFO "Mem-info:\n"); - show_free_areas(); - for_each_online_pgdat(pgdat) { - for (i = 0; i < pgdat->node_spanned_pages; ++i) { - /* - * This loop can take a while with 256 GB and - * 4k pages so defer the NMI watchdog: - */ - if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) - touch_nmi_watchdog(); - - if (!pfn_valid(pgdat->node_start_pfn + i)) - continue; - - page = pfn_to_page(pgdat->node_start_pfn + i); - total++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; - } + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; } - printk(KERN_INFO "%lu pages of RAM\n", total); - printk(KERN_INFO "%lu reserved pages\n", reserved); - printk(KERN_INFO "%lu pages shared\n", shared); - printk(KERN_INFO "%lu pages swap cached\n", cached); + return 0; } +early_param("noexec", nonx_setup); -int after_bootmem; +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +int force_personality32; + +/* + * noexec32=on|off + * Control non executable heap for 32bit processes. + * To control the stack too use noexec=off + * + * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default) + * off PROT_READ implies PROT_EXEC + */ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); -static __init void *spp_getpage(void) +/* + * NOTE: This function is marked __ref because it calls __init function + * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. + */ +static __ref void *spp_getpage(void) { void *ptr; @@ -274,7 +297,7 @@ static unsigned long __initdata table_start; static unsigned long __meminitdata table_end; static unsigned long __meminitdata table_top; -static __meminit void *alloc_low_page(unsigned long *phys) +static __ref void *alloc_low_page(unsigned long *phys) { unsigned long pfn = table_end++; void *adr; @@ -295,7 +318,7 @@ static __meminit void *alloc_low_page(unsigned long *phys) return adr; } -static __meminit void unmap_low_page(void *adr) +static __ref void unmap_low_page(void *adr) { if (after_bootmem) return; @@ -351,6 +374,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, { unsigned long pages = 0; unsigned long last_map_addr = end; + unsigned long start = address; int i = pmd_index(address); @@ -368,16 +392,24 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, } if (pmd_val(*pmd)) { - if (!pmd_large(*pmd)) + if (!pmd_large(*pmd)) { + spin_lock(&init_mm.page_table_lock); last_map_addr = phys_pte_update(pmd, address, - end); + end); + spin_unlock(&init_mm.page_table_lock); + } + /* Count entries we're using from level2_ident_pgt */ + if (start == 0) + pages++; continue; } if (page_size_mask & (1<<PG_LEVEL_2M)) { pages++; + spin_lock(&init_mm.page_table_lock); set_pte((pte_t *)pmd, pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + spin_unlock(&init_mm.page_table_lock); last_map_addr = (address & PMD_MASK) + PMD_SIZE; continue; } @@ -386,7 +418,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, last_map_addr = phys_pte_init(pte, address, end); unmap_low_page(pte); + spin_lock(&init_mm.page_table_lock); pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); + spin_unlock(&init_mm.page_table_lock); } update_page_count(PG_LEVEL_2M, pages); return last_map_addr; @@ -399,9 +433,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, pmd_t *pmd = pmd_offset(pud, 0); unsigned long last_map_addr; - spin_lock(&init_mm.page_table_lock); last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); - spin_unlock(&init_mm.page_table_lock); __flush_tlb_all(); return last_map_addr; } @@ -437,20 +469,21 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, if (page_size_mask & (1<<PG_LEVEL_1G)) { pages++; + spin_lock(&init_mm.page_table_lock); set_pte((pte_t *)pud, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + spin_unlock(&init_mm.page_table_lock); last_map_addr = (addr & PUD_MASK) + PUD_SIZE; continue; } pmd = alloc_low_page(&pmd_phys); - - spin_lock(&init_mm.page_table_lock); last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); unmap_low_page(pmd); + + spin_lock(&init_mm.page_table_lock); pud_populate(&init_mm, pud, __va(pmd_phys)); spin_unlock(&init_mm.page_table_lock); - } __flush_tlb_all(); update_page_count(PG_LEVEL_1G, pages); @@ -542,16 +575,14 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, continue; } - if (after_bootmem) - pud = pud_offset(pgd, start & PGDIR_MASK); - else - pud = alloc_low_page(&pud_phys); - + pud = alloc_low_page(&pud_phys); last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), page_size_mask); unmap_low_page(pud); - pgd_populate(&init_mm, pgd_offset_k(start), - __va(pud_phys)); + + spin_lock(&init_mm.page_table_lock); + pgd_populate(&init_mm, pgd, __va(pud_phys)); + spin_unlock(&init_mm.page_table_lock); } return last_map_addr; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 016f335bbee..cac6da54203 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -170,7 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; - retval = reserve_memtype(phys_addr, phys_addr + size, + retval = reserve_memtype(phys_addr, (u64)phys_addr + size, prot_val, &new_prot_val); if (retval) { pr_debug("Warning: reserve_memtype returned %d\n", retval); @@ -421,7 +421,7 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr) return; } -int __initdata early_ioremap_debug; +static int __initdata early_ioremap_debug; static int __init early_ioremap_debug_setup(char *str) { @@ -547,19 +547,17 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) } -int __initdata early_ioremap_nested; +static int __initdata early_ioremap_nested; static int __init check_early_ioremap_leak(void) { if (!early_ioremap_nested) return 0; - - printk(KERN_WARNING + WARN(1, KERN_WARNING "Debug warning: early ioremap leak of %d areas detected.\n", - early_ioremap_nested); + early_ioremap_nested); printk(KERN_WARNING - "please boot with early_ioremap_debug and report the dmesg.\n"); - WARN_ON(1); + "please boot with early_ioremap_debug and report the dmesg.\n"); return 1; } diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index e7397e108be..635b50e8558 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -430,7 +430,9 @@ static void enter_uniprocessor(void) "may miss events.\n"); } -static void leave_uniprocessor(void) +/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, + but this whole function is ifdefed CONFIG_HOTPLUG_CPU */ +static void __ref leave_uniprocessor(void) { int cpu; int err; diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 0dcd42eb94e..d4aa503caaa 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -221,8 +221,7 @@ static int pageattr_test(void) failed += print_split(&sc); if (failed) { - printk(KERN_ERR "NOT PASSED. Please report.\n"); - WARN_ON(1); + WARN(1, KERN_ERR "NOT PASSED. Please report.\n"); return -EINVAL; } else { if (print) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 65c6e46bf05..43e2f8483e4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -55,13 +55,19 @@ static void split_page_count(int level) int arch_report_meminfo(char *page) { - int n = sprintf(page, "DirectMap4k: %8lu\n" - "DirectMap2M: %8lu\n", - direct_pages_count[PG_LEVEL_4K], - direct_pages_count[PG_LEVEL_2M]); + int n = sprintf(page, "DirectMap4k: %8lu kB\n", + direct_pages_count[PG_LEVEL_4K] << 2); +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) + n += sprintf(page + n, "DirectMap2M: %8lu kB\n", + direct_pages_count[PG_LEVEL_2M] << 11); +#else + n += sprintf(page + n, "DirectMap4M: %8lu kB\n", + direct_pages_count[PG_LEVEL_2M] << 12); +#endif #ifdef CONFIG_X86_64 - n += sprintf(page + n, "DirectMap1G: %8lu\n", - direct_pages_count[PG_LEVEL_1G]); + if (direct_gbpages) + n += sprintf(page + n, "DirectMap1G: %8lu kB\n", + direct_pages_count[PG_LEVEL_1G] << 20); #endif return n; } @@ -592,10 +598,9 @@ repeat: if (!pte_val(old_pte)) { if (!primary) return 0; - printk(KERN_WARNING "CPA: called for zero pte. " + WARN(1, KERN_WARNING "CPA: called for zero pte. " "vaddr = %lx cpa->vaddr = %lx\n", address, cpa->vaddr); - WARN_ON(1); return -EINVAL; } @@ -844,7 +849,7 @@ int set_memory_uc(unsigned long addr, int numpages) /* * for now UC MINUS. see comments in ioremap_nocache() */ - if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, + if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_UC_MINUS, NULL)) return -EINVAL; @@ -863,7 +868,7 @@ int set_memory_wc(unsigned long addr, int numpages) if (!pat_enabled) return set_memory_uc(addr, numpages); - if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, + if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_WC, NULL)) return -EINVAL; @@ -879,7 +884,7 @@ int _set_memory_wb(unsigned long addr, int numpages) int set_memory_wb(unsigned long addr, int numpages) { - free_memtype(addr, addr + numpages * PAGE_SIZE); + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); return _set_memory_wb(addr, numpages); } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2fe30916d4b..2a50e0fa64a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -207,6 +207,9 @@ static int chk_conflict(struct memtype *new, struct memtype *entry, return -EBUSY; } +static struct memtype *cached_entry; +static u64 cached_start; + /* * req_type typically has one of the: * - _PAGE_CACHE_WB @@ -280,11 +283,17 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, spin_lock(&memtype_lock); + if (cached_entry && start >= cached_start) + entry = cached_entry; + else + entry = list_entry(&memtype_list, struct memtype, nd); + /* Search for existing mapping that overlaps the current range */ where = NULL; - list_for_each_entry(entry, &memtype_list, nd) { + list_for_each_entry_continue(entry, &memtype_list, nd) { if (end <= entry->start) { where = entry->nd.prev; + cached_entry = list_entry(where, struct memtype, nd); break; } else if (start <= entry->start) { /* end > entry->start */ err = chk_conflict(new, entry, new_type); @@ -292,6 +301,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); where = entry->nd.prev; + cached_entry = list_entry(where, + struct memtype, nd); } break; } else if (start < entry->end) { /* start > entry->start */ @@ -299,7 +310,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (!err) { dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); - where = &entry->nd; + cached_entry = list_entry(entry->nd.prev, + struct memtype, nd); + + /* + * Move to right position in the linked + * list to add this new entry + */ + list_for_each_entry_continue(entry, + &memtype_list, nd) { + if (start <= entry->start) { + where = entry->nd.prev; + break; + } + } } break; } @@ -314,6 +338,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return err; } + cached_start = start; + if (where) list_add(&new->nd, where); else @@ -343,6 +369,9 @@ int free_memtype(u64 start, u64 end) spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { if (entry->start == start && entry->end == end) { + if (cached_entry == entry || cached_start == start) + cached_entry = NULL; + list_del(&entry->nd); kfree(entry); err = 0; @@ -361,14 +390,6 @@ int free_memtype(u64 start, u64 end) } -/* - * /dev/mem mmap interface. The memtype used for mapping varies: - * - Use UC for mappings with O_SYNC flag - * - Without O_SYNC flag, if there is any conflict in reserve_memtype, - * inherit the memtype from existing mapping. - * - Else use UC_MINUS memtype (for backward compatibility with existing - * X drivers. - */ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -406,14 +427,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { u64 offset = ((u64) pfn) << PAGE_SHIFT; - unsigned long flags = _PAGE_CACHE_UC_MINUS; + unsigned long flags = -1; int retval; if (!range_is_allowed(pfn, size)) return 0; if (file->f_flags & O_SYNC) { - flags = _PAGE_CACHE_UC; + flags = _PAGE_CACHE_UC_MINUS; } #ifdef CONFIG_X86_32 @@ -436,13 +457,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, #endif /* - * With O_SYNC, we can only take UC mapping. Fail if we cannot. + * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot. + * * Without O_SYNC, we want to get * - WB for WB-able memory and no other conflicting mappings * - UC_MINUS for non-WB-able memory with no other conflicting mappings * - Inherit from confliting mappings otherwise */ - if (flags != _PAGE_CACHE_UC_MINUS) { + if (flags != -1) { retval = reserve_memtype(offset, offset + size, flags, NULL); } else { retval = reserve_memtype(offset, offset + size, -1, &flags); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 557b2abceef..d50302774fe 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) unsigned long addr; int i; + if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ + return; + pud = pud_offset(pgd, 0); for (addr = i = 0; i < PREALLOCATED_PMDS; diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index b4becbf8c57..cab0abbd1eb 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -20,53 +20,6 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> -void show_mem(void) -{ - int total = 0, reserved = 0; - int shared = 0, cached = 0; - int highmem = 0; - struct page *page; - pg_data_t *pgdat; - unsigned long i; - unsigned long flags; - - printk(KERN_INFO "Mem-info:\n"); - show_free_areas(); - for_each_online_pgdat(pgdat) { - pgdat_resize_lock(pgdat, &flags); - for (i = 0; i < pgdat->node_spanned_pages; ++i) { - if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) - touch_nmi_watchdog(); - page = pgdat_page_nr(pgdat, i); - total++; - if (PageHighMem(page)) - highmem++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; - } - pgdat_resize_unlock(pgdat, &flags); - } - printk(KERN_INFO "%d pages of RAM\n", total); - printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); - printk(KERN_INFO "%d reserved pages\n", reserved); - printk(KERN_INFO "%d pages shared\n", shared); - printk(KERN_INFO "%d pages swap cached\n", cached); - - printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); - printk(KERN_INFO "%lu pages writeback\n", - global_page_state(NR_WRITEBACK)); - printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); - printk(KERN_INFO "%lu pages slab\n", - global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE)); - printk(KERN_INFO "%lu pages pagetables\n", - global_page_state(NR_PAGETABLE)); -} - /* * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 1eb2973a301..16ae70fc57e 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -178,7 +178,7 @@ void acpi_numa_arch_fixup(void) * start of the node, and that the current "end" address is after * the previous one. */ -static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) +static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) { /* * Only add present memory as told by the e820. @@ -189,10 +189,10 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c if (memory_chunk->start_pfn >= max_pfn) { printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", memory_chunk->start_pfn, memory_chunk->end_pfn); - return; + return -1; } if (memory_chunk->nid != nid) - return; + return -1; if (!node_has_online_mem(nid)) node_start_pfn[nid] = memory_chunk->start_pfn; @@ -202,6 +202,8 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c if (node_end_pfn[nid] < memory_chunk->end_pfn) node_end_pfn[nid] = memory_chunk->end_pfn; + + return 0; } int __init get_memcfg_from_srat(void) @@ -259,7 +261,9 @@ int __init get_memcfg_from_srat(void) printk(KERN_DEBUG "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", j, chunk->nid, chunk->start_pfn, chunk->end_pfn); - node_read_chunk(chunk->nid, chunk); + if (node_read_chunk(chunk->nid, chunk)) + continue; + e820_register_active_regions(chunk->nid, chunk->start_pfn, min(chunk->end_pfn, max_pfn)); } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3f90289410e..0227694f7da 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/moduleparam.h> #include <linux/kdebug.h> +#include <linux/cpu.h> #include <asm/nmi.h> #include <asm/msr.h> #include <asm/apic.h> @@ -28,23 +29,48 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); static int nmi_start(void); static void nmi_stop(void); +static void nmi_cpu_start(void *dummy); +static void nmi_cpu_stop(void *dummy); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; +#ifdef CONFIG_SMP +static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, + void *data) +{ + int cpu = (unsigned long)data; + switch (action) { + case CPU_DOWN_FAILED: + case CPU_ONLINE: + smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); + break; + case CPU_DOWN_PREPARE: + smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block oprofile_cpu_nb = { + .notifier_call = oprofile_cpu_notifier +}; +#endif + #ifdef CONFIG_PM static int nmi_suspend(struct sys_device *dev, pm_message_t state) { + /* Only one CPU left, just stop that one */ if (nmi_enabled == 1) - nmi_stop(); + nmi_cpu_stop(NULL); return 0; } static int nmi_resume(struct sys_device *dev) { if (nmi_enabled == 1) - nmi_start(); + nmi_cpu_start(NULL); return 0; } @@ -463,6 +489,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) } init_sysfs(); +#ifdef CONFIG_SMP + register_cpu_notifier(&oprofile_cpu_nb); +#endif using_nmi = 1; ops->create_files = nmi_create_files; ops->setup = nmi_setup; @@ -476,6 +505,10 @@ int __init op_nmi_init(struct oprofile_operations *ops) void op_nmi_exit(void) { - if (using_nmi) + if (using_nmi) { exit_sysfs(); +#ifdef CONFIG_SMP + unregister_cpu_notifier(&oprofile_cpu_nb); +#endif + } } diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 19af06927fb..1d88d2b3977 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -250,10 +250,5 @@ int __init pci_acpi_init(void) acpi_pci_irq_enable(dev); } -#ifdef CONFIG_X86_IO_APIC - if (acpi_ioapic) - print_IO_APIC(); -#endif - return 0; } diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index dbf53236971..6a0fca78c36 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -1,6 +1,7 @@ #include <linux/init.h> #include <linux/pci.h> #include <linux/topology.h> +#include <linux/cpu.h> #include "pci.h" #ifdef CONFIG_X86_64 @@ -555,15 +556,17 @@ static int __init early_fill_mp_bus_info(void) return 0; } -postcore_initcall(early_fill_mp_bus_info); +#else /* !CONFIG_X86_64 */ -#endif +static int __init early_fill_mp_bus_info(void) { return 0; } + +#endif /* !CONFIG_X86_64 */ /* common 32/64 bit code */ #define ENABLE_CF8_EXT_CFG (1ULL << 46) -static void enable_pci_io_ecs_per_cpu(void *unused) +static void enable_pci_io_ecs(void *unused) { u64 reg; rdmsrl(MSR_AMD64_NB_CFG, reg); @@ -573,14 +576,51 @@ static void enable_pci_io_ecs_per_cpu(void *unused) } } -static int __init enable_pci_io_ecs(void) +static int __cpuinit amd_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { + int cpu = (long)hcpu; + switch(action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata amd_cpu_notifier = { + .notifier_call = amd_cpu_notify, +}; + +static int __init pci_io_ecs_init(void) +{ + int cpu; + /* assume all cpus from fam10h have IO ECS */ if (boot_cpu_data.x86 < 0x10) return 0; - on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1); + + register_cpu_notifier(&amd_cpu_notifier); + for_each_online_cpu(cpu) + amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, + (void *)(long)cpu); pci_probe |= PCI_HAS_IO_ECS; + + return 0; +} + +static int __init amd_postcore_init(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return 0; + + early_fill_mp_bus_info(); + pci_io_ecs_init(); + return 0; } -postcore_initcall(enable_pci_io_ecs); +postcore_initcall(amd_postcore_init); diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index ff3a6a33634..4bdaa590375 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -23,7 +23,8 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) pci_read_config_byte(d, reg++, &busno); pci_read_config_byte(d, reg++, &suba); pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, + suba, subb); if (busno) pci_scan_bus_with_sysdata(busno); /* Bus A */ if (suba < subb) diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a09505806b8..844df0cbbd3 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -33,6 +33,7 @@ #include <linux/bootmem.h> #include <asm/pat.h> +#include <asm/e820.h> #include "pci.h" @@ -128,10 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) pr = pci_find_parent_resource(dev, r); if (!r->start || !pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate " - "resource region %d " - "of bridge %s\n", - idx, pci_name(dev)); + dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -166,15 +164,13 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - DBG("PCI: Resource %08lx-%08lx " - "(f=%lx, d=%d, p=%d)\n", - r->start, r->end, r->flags, disabled, pass); + dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n", + (unsigned long long) r->start, + (unsigned long long) r->end, + r->flags, disabled, pass); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate " - "resource region %d " - "of device %s\n", - idx, pci_name(dev)); + dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -187,8 +183,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - DBG("PCI: Switching off ROM of %s\n", - pci_name(dev)); + dev_dbg(&dev->dev, "disabling ROM\n"); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); @@ -233,6 +228,8 @@ void __init pcibios_resource_survey(void) pcibios_allocate_bus_resources(&pci_root_buses); pcibios_allocate_resources(0); pcibios_allocate_resources(1); + + e820_reserve_resources_late(); } /** @@ -257,8 +254,7 @@ void pcibios_set_master(struct pci_dev *dev) lat = pcibios_max_latency; else return; - printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", - pci_name(dev), lat); + dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 6a06a2eb059..8e077185e18 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -436,7 +436,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { WARN_ON_ONCE(pirq >= 9); if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); return 0; } return read_config_nybble(router, 0x74, pirq-1); @@ -446,7 +446,7 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, { WARN_ON_ONCE(pirq >= 9); if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); return 0; } write_config_nybble(router, 0x74, pirq-1, irq); @@ -492,15 +492,17 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq irq = 0; if (pirq <= 4) irq = read_config_nybble(router, 0x56, pirq - 1); - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", - dev->vendor, dev->device, pirq, irq); + dev_info(&dev->dev, + "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", + dev->vendor, dev->device, pirq, irq); return irq; } static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", - dev->vendor, dev->device, pirq, irq); + dev_info(&dev->dev, + "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", + dev->vendor, dev->device, pirq, irq); if (pirq <= 4) write_config_nybble(router, 0x56, pirq - 1, irq); return 1; @@ -588,6 +590,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: + case PCI_DEVICE_ID_INTEL_PCH_0: + case PCI_DEVICE_ID_INTEL_PCH_1: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; @@ -730,7 +734,6 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, switch (device) { case PCI_DEVICE_ID_AL_M1533: case PCI_DEVICE_ID_AL_M1563: - printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); r->name = "ALI"; r->get = pirq_ali_get; r->set = pirq_ali_set; @@ -840,11 +843,9 @@ static void __init pirq_find_router(struct irq_router *r) h->probe(r, pirq_router_dev, pirq_router_dev->device)) break; } - printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", - pirq_router.name, - pirq_router_dev->vendor, - pirq_router_dev->device, - pci_name(pirq_router_dev)); + dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", + pirq_router.name, + pirq_router_dev->vendor, pirq_router_dev->device); /* The device remains referenced for the kernel lifetime */ } @@ -877,7 +878,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) /* Find IRQ pin */ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (!pin) { - DBG(KERN_DEBUG " -> no interrupt pin\n"); + dev_dbg(&dev->dev, "no interrupt pin\n"); return 0; } pin = pin - 1; @@ -887,20 +888,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!pirq_table) return 0; - DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); info = pirq_get_info(dev); if (!info) { - DBG(" -> not found in routing table\n" KERN_DEBUG); + dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", + 'A' + pin); return 0; } pirq = info->irq[pin].link; mask = info->irq[pin].bitmap; if (!pirq) { - DBG(" -> not routed\n" KERN_DEBUG); + dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); return 0; } - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, - pirq_table->exclusive_irqs); + dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", + 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); mask &= pcibios_irq_mask; /* Work around broken HP Pavilion Notebooks which assign USB to @@ -930,10 +931,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; else - printk("\n" KERN_WARNING - "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" - KERN_DEBUG, newirq, - pci_name(dev)); + dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask " + "%#x; try pci=usepirqmask\n", newirq, mask); } if (!newirq && assign) { for (i = 0; i < 16; i++) { @@ -944,39 +943,35 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) newirq = i; } } - DBG(" -> newirq=%d", newirq); + dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); /* Check if it is hardcoded */ if ((pirq & 0xf0) == 0xf0) { irq = pirq & 0xf; - DBG(" -> hardcoded IRQ %d\n", irq); - msg = "Hardcoded"; + msg = "hardcoded"; } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { - DBG(" -> got IRQ %d\n", irq); - msg = "Found"; + msg = "found"; eisa_set_level_irq(irq); } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { - DBG(" -> assigning IRQ %d", newirq); if (r->set(pirq_router_dev, dev, pirq, newirq)) { eisa_set_level_irq(newirq); - DBG(" ... OK\n"); - msg = "Assigned"; + msg = "assigned"; irq = newirq; } } if (!irq) { - DBG(" ... failed\n"); if (newirq && mask == (1 << newirq)) { - msg = "Guessed"; + msg = "guessed"; irq = newirq; - } else + } else { + dev_dbg(&dev->dev, "can't route interrupt\n"); return 0; + } } - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, - pci_name(dev)); + dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); /* Update IRQ for all devices with the same pirq value */ while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { @@ -996,17 +991,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) (!(pci_probe & PCI_USE_PIRQ_MASK) || \ ((1 << dev2->irq) & mask))) { #ifndef CONFIG_PCI_MSI - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", - pci_name(dev2), dev2->irq, irq); + dev_info(&dev2->dev, "IRQ routing conflict: " + "have IRQ %d, want IRQ %d\n", + dev2->irq, irq); #endif continue; } dev2->irq = irq; pirq_penalty[irq]++; if (dev != dev2) - printk(KERN_INFO - "PCI: Sharing IRQ %d with %s\n", - irq, pci_name(dev2)); + dev_info(&dev->dev, "sharing IRQ %d with %s\n", + irq, pci_name(dev2)); } } return 1; @@ -1025,8 +1020,7 @@ static void __init pcibios_fixup_irqs(void) * already in use. */ if (dev->irq >= 16) { - DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", - pci_name(dev), dev->irq); + dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq); dev->irq = 0; } /* @@ -1070,12 +1064,12 @@ static void __init pcibios_fixup_irqs(void) irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), 'A' + pin, irq); + dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", + pci_name(bridge), + 'A' + pin, irq); } if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", - pci_name(dev), 'A' + pin, irq); + dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); dev->irq = irq; } } @@ -1231,25 +1225,24 @@ static int pirq_enable_irq(struct pci_dev *dev) irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) - printk(KERN_WARNING - "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), - 'A' + pin, irq); + dev_warn(&dev->dev, "using bridge %s " + "INT %c to get IRQ %d\n", + pci_name(bridge), 'A' + pin, + irq); dev = bridge; } dev = temp_dev; if (irq >= 0) { - printk(KERN_INFO - "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", - pci_name(dev), 'A' + pin, irq); + dev_info(&dev->dev, "PCI->APIC IRQ transform: " + "INT %c -> IRQ %d\n", 'A' + pin, irq); dev->irq = irq; return 0; } else - msg = " Probably buggy MP table."; + msg = "; probably buggy MP table"; } else if (pci_probe & PCI_BIOS_IRQ_SCAN) msg = ""; else - msg = " Please try using pci=biosirq."; + msg = "; please try using pci=biosirq"; /* * With IDE legacy devices the IRQ lookup failure is not @@ -1259,9 +1252,8 @@ static int pirq_enable_irq(struct pci_dev *dev) !(dev->class & 0x5)) return 0; - printk(KERN_WARNING - "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", - 'A' + pin, pci_name(dev), msg); + dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", + 'A' + pin, msg); } return 0; } diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index ec9ce35e44d..b722dd481b3 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -14,7 +14,7 @@ static void __devinit pcibios_fixup_peer_bridges(void) int n, devfn; long node; - if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) + if (pcibios_last_bus <= 0 || pcibios_last_bus > 0xff) return; DBG("PCI: Peer bridge fixup\n"); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 23faaa890ff..654a2234f8f 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -209,7 +209,7 @@ static int __init pci_mmcfg_check_hostbridge(void) return name != NULL; } -static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) +static void __init pci_mmcfg_insert_resources(void) { #define PCI_MMCFG_RESOURCE_NAME_LEN 19 int i; @@ -233,7 +233,7 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) cfg->pci_segment); res->start = cfg->address; res->end = res->start + (num_buses << 20) - 1; - res->flags = IORESOURCE_MEM | resource_flags; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; insert_resource(&iomem_resource, res); names += PCI_MMCFG_RESOURCE_NAME_LEN; } @@ -293,7 +293,7 @@ static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, return AE_OK; } -static int __init is_acpi_reserved(unsigned long start, unsigned long end) +static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) { struct resource mcfg_res; @@ -310,6 +310,41 @@ static int __init is_acpi_reserved(unsigned long start, unsigned long end) return mcfg_res.flags; } +typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); + +static int __init is_mmconf_reserved(check_reserved_t is_reserved, + u64 addr, u64 size, int i, + typeof(pci_mmcfg_config[0]) *cfg, int with_e820) +{ + u64 old_size = size; + int valid = 0; + + while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) { + size >>= 1; + if (size < (16UL<<20)) + break; + } + + if (size >= (16UL<<20) || size == old_size) { + printk(KERN_NOTICE + "PCI: MCFG area at %Lx reserved in %s\n", + addr, with_e820?"E820":"ACPI motherboard resources"); + valid = 1; + + if (old_size != size) { + /* update end_bus_number */ + cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1); + printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " + "segment %hu buses %u - %u\n", + i, (unsigned long)cfg->address, cfg->pci_segment, + (unsigned int)cfg->start_bus_number, + (unsigned int)cfg->end_bus_number); + } + } + + return valid; +} + static void __init pci_mmcfg_reject_broken(int early) { typeof(pci_mmcfg_config[0]) *cfg; @@ -324,21 +359,22 @@ static void __init pci_mmcfg_reject_broken(int early) for (i = 0; i < pci_mmcfg_config_num; i++) { int valid = 0; - u32 size = (cfg->end_bus_number + 1) << 20; + u64 addr, size; + cfg = &pci_mmcfg_config[i]; + addr = cfg->start_bus_number; + addr <<= 20; + addr += cfg->address; + size = cfg->end_bus_number + 1 - cfg->start_bus_number; + size <<= 20; printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " "segment %hu buses %u - %u\n", i, (unsigned long)cfg->address, cfg->pci_segment, (unsigned int)cfg->start_bus_number, (unsigned int)cfg->end_bus_number); - if (!early && - is_acpi_reserved(cfg->address, cfg->address + size - 1)) { - printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved " - "in ACPI motherboard resources\n", - cfg->address); - valid = 1; - } + if (!early) + valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); if (valid) continue; @@ -347,16 +383,11 @@ static void __init pci_mmcfg_reject_broken(int early) printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" " reserved in ACPI motherboard resources\n", cfg->address); + /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ - if (raw_pci_ops && e820_all_mapped(cfg->address, - cfg->address + size - 1, - E820_RESERVED)) { - printk(KERN_NOTICE - "PCI: MCFG area at %Lx reserved in E820\n", - cfg->address); - valid = 1; - } + if (raw_pci_ops) + valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1); if (!valid) goto reject; @@ -365,7 +396,7 @@ static void __init pci_mmcfg_reject_broken(int early) return; reject: - printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); pci_mmcfg_arch_free(); kfree(pci_mmcfg_config); pci_mmcfg_config = NULL; @@ -403,11 +434,9 @@ static void __init __pci_mmcfg_init(int early) (pci_mmcfg_config[0].address == 0)) return; - if (pci_mmcfg_arch_init()) { - if (known_bridge) - pci_mmcfg_insert_resources(IORESOURCE_BUSY); + if (pci_mmcfg_arch_init()) pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; - } else { + else { /* * Signal not to attempt to insert mmcfg resources because * the architecture mmcfg setup could not initialize. @@ -444,7 +473,7 @@ static int __init pci_mmcfg_late_insert_resources(void) * marked so it won't cause request errors when __request_region is * called. */ - pci_mmcfg_insert_resources(0); + pci_mmcfg_insert_resources(); return 0; } diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index f4b16dc11da..1177845d318 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -131,13 +131,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) u8 busno, suba, subb; int quad = BUS2QUAD(d->bus->number); - printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); + dev_info(&d->dev, "searching for i450NX host bridges\n"); reg = 0xd0; for(pxb=0; pxb<2; pxb++) { pci_read_config_byte(d, reg++, &busno); pci_read_config_byte(d, reg++, &suba); pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", + pxb, busno, suba, subb); if (busno) { /* Bus A */ pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 7dc5d5cf50a..274d06082f4 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -11,6 +11,7 @@ #include <linux/suspend.h> #include <asm/mtrr.h> #include <asm/mce.h> +#include <asm/xcr.h> static struct saved_context saved_context; @@ -45,7 +46,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr0 = read_cr0(); ctxt->cr2 = read_cr2(); ctxt->cr3 = read_cr3(); - ctxt->cr4 = read_cr4(); + ctxt->cr4 = read_cr4_safe(); } /* Needed by apm.c */ @@ -98,7 +99,9 @@ static void __restore_processor_state(struct saved_context *ctxt) /* * control registers */ - write_cr4(ctxt->cr4); + /* cr4 was introduced in the Pentium CPU */ + if (ctxt->cr4) + write_cr4(ctxt->cr4); write_cr3(ctxt->cr3); write_cr2(ctxt->cr2); write_cr0(ctxt->cr0); @@ -124,6 +127,12 @@ static void __restore_processor_state(struct saved_context *ctxt) if (boot_cpu_has(X86_FEATURE_SEP)) enable_sep_cpu(); + /* + * restore XCR0 for xsave capable cpu's. + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + fix_processor_context(); do_fpu_end(); mtrr_ap_init(); diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index 66bdfb591fd..e3b6cf70d62 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -14,6 +14,7 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/mtrr.h> +#include <asm/xcr.h> static void fix_processor_context(void); @@ -122,6 +123,12 @@ static void __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + /* + * restore XCR0 for xsave capable cpu's. + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + fix_processor_context(); do_fpu_end(); diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index b95aa6cfe3c..4fc7e872c85 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -28,9 +28,9 @@ ENTRY(swsusp_arch_suspend) ret ENTRY(restore_image) - movl resume_pg_dir, %ecx - subl $__PAGE_OFFSET, %ecx - movl %ecx, %cr3 + movl resume_pg_dir, %eax + subl $__PAGE_OFFSET, %eax + movl %eax, %cr3 movl restore_pblist, %edx .p2align 4,,7 @@ -52,17 +52,21 @@ copy_loop: done: /* go back to the original page tables */ - movl $swapper_pg_dir, %ecx - subl $__PAGE_OFFSET, %ecx - movl %ecx, %cr3 + movl $swapper_pg_dir, %eax + subl $__PAGE_OFFSET, %eax + movl %eax, %cr3 /* Flush TLB, including "global" things (vmalloc) */ - movl mmu_cr4_features, %eax - movl %eax, %edx + movl mmu_cr4_features, %ecx + jecxz 1f # cr4 Pentium and higher, skip if zero + movl %ecx, %edx andl $~(1<<7), %edx; # PGE movl %edx, %cr4; # turn off PGE - movl %cr3, %ecx; # flush TLB - movl %ecx, %cr3 - movl %eax, %cr4; # turn PGE back on +1: + movl %cr3, %eax; # flush TLB + movl %eax, %cr3 + jecxz 1f # cr4 Pentium and higher, skip if zero + movl %ecx, %cr4; # turn PGE back on +1: movl saved_context_esp, %esp movl saved_context_ebp, %ebp diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9ff6e3cbf08..8d28925ebed 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -36,6 +36,7 @@ #include <xen/hvc-console.h> #include <asm/paravirt.h> +#include <asm/apic.h> #include <asm/page.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> @@ -580,16 +581,47 @@ static void xen_io_delay(void) } #ifdef CONFIG_X86_LOCAL_APIC -static u32 xen_apic_read(unsigned long reg) +static u32 xen_apic_read(u32 reg) { return 0; } -static void xen_apic_write(unsigned long reg, u32 val) +static void xen_apic_write(u32 reg, u32 val) { /* Warn to see if there's any stray references */ WARN_ON(1); } + +static u64 xen_apic_icr_read(void) +{ + return 0; +} + +static void xen_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static void xen_apic_wait_icr_idle(void) +{ + return; +} + +static u32 xen_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static struct apic_ops xen_basic_apic_ops = { + .read = xen_apic_read, + .write = xen_apic_write, + .icr_read = xen_apic_icr_read, + .icr_write = xen_apic_icr_write, + .wait_icr_idle = xen_apic_wait_icr_idle, + .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, +}; + #endif static void xen_flush_tlb(void) @@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC - .apic_write = xen_apic_write, - .apic_read = xen_apic_read, .setup_boot_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, @@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void) pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; +#ifdef CONFIG_X86_LOCAL_APIC + /* + * set up the basic apic ops. + */ + apic_ops = &xen_basic_apic_ops; +#endif + if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; |