From c5cca146aa03e1f60fb179df65f0dbaf17bc64ed Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 9 Oct 2009 18:31:20 +0200 Subject: x86/amd-iommu: Workaround for erratum 63 There is an erratum for IOMMU hardware which documents undefined behavior when forwarding SMI requests from peripherals and the DTE of that peripheral has a sysmgt value of 01b. This problem caused weird IO_PAGE_FAULTS in my case. This patch implements the suggested workaround for that erratum into the AMD IOMMU driver. The erratum is documented with number 63. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 1 + arch/x86/kernel/amd_iommu.c | 2 ++ arch/x86/kernel/amd_iommu_init.c | 22 ++++++++++++++++++++++ 3 files changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index bdf96f119f0..9dbd4030f0c 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -30,6 +30,7 @@ extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_shutdown(void); +extern void amd_iommu_apply_erratum_63(u16 devid); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f503780..f95dfe52644 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1114,6 +1114,8 @@ static void __detach_device(struct protection_domain *domain, u16 devid) amd_iommu_dev_table[devid].data[1] = 0; amd_iommu_dev_table[devid].data[2] = 0; + amd_iommu_apply_erratum_63(devid); + /* decrease reference counter */ domain->dev_cnt -= 1; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252..498c8c79aaa 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -509,6 +509,26 @@ static void set_dev_entry_bit(u16 devid, u8 bit) amd_iommu_dev_table[devid].data[i] |= (1 << _bit); } +static int get_dev_entry_bit(u16 devid, u8 bit) +{ + int i = (bit >> 5) & 0x07; + int _bit = bit & 0x1f; + + return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; +} + + +void amd_iommu_apply_erratum_63(u16 devid) +{ + int sysmgt; + + sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | + (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); + + if (sysmgt == 0x01) + set_dev_entry_bit(devid, DEV_ENTRY_IW); +} + /* Writes the specific IOMMU for a device into the rlookup table */ static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) { @@ -537,6 +557,8 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, if (flags & ACPI_DEVFLAG_LINT1) set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); + amd_iommu_apply_erratum_63(devid); + set_iommu_for_device(iommu, devid); } -- cgit v1.2.3 From 14a3f40aafacde1dfd6912327ae14df4baf10304 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 23 Oct 2009 07:31:01 -0700 Subject: x86: Remove STACKPROTECTOR_ALL STACKPROTECTOR_ALL has a really high overhead (runtime and stack footprint) and is not really worth it protection wise (the normal STACKPROTECTOR is in effect for all functions with buffers already), so lets just remove the option entirely. Reported-by: Dave Jones Reported-by: Chuck Ebbert Signed-off-by: Arjan van de Ven Cc: Eric Sandeen LKML-Reference: <20091023073101.3dce4ebb@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- arch/x86/Makefile | 1 - 2 files changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 07e01149e3b..72ace9515a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1443,12 +1443,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. -config CC_STACKPROTECTOR_ALL - bool - config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - select CC_STACKPROTECTOR_ALL ---help--- This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of functions, a canary value on diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a012ee8ef80..d2d24c9ee64 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -76,7 +76,6 @@ ifdef CONFIG_CC_STACKPROTECTOR cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) stackp-y := -fstack-protector - stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all KBUILD_CFLAGS += $(stackp-y) else $(warning stack protector enabled but no compiler support) -- cgit v1.2.3 From ae1b22f6e46c03cede7cea234d0bf2253b4261cf Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 26 Oct 2009 14:26:04 +1030 Subject: x86: Side-step lguest problem by only building cmpxchg8b_emu for pre-Pentium Commit 79e1dd05d1a22 "x86: Provide an alternative() based cmpxchg64()" broke lguest, even on systems which have cmpxchg8b support. The emulation code gets used until alternatives get run, but it contains native instructions, not their paravirt alternatives. The simplest fix is to turn this code off except for 386 and 486 builds. Reported-by: Johannes Stezenbach Signed-off-by: Rusty Russell Acked-by: H. Peter Anvin Cc: lguest@ozlabs.org Cc: Arjan van de Ven Cc: Jeremy Fitzhardinge Cc: Linus Torvalds LKML-Reference: <200910261426.05769.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index f2824fb8c79..2649840d888 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -400,7 +400,7 @@ config X86_TSC config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM + depends on !M386 && !M486 # this should be set for all -march=.. options where the compiler # generates cmov. -- cgit v1.2.3 From 72ed7de74e8f0fad0d8e567ae1f987b740accb3f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 26 Oct 2009 11:11:43 +0100 Subject: x86: crash_dump: Fix non-pae kdump kernel memory accesses Non-PAE 32-bit dump kernels may wrap an address around 4G and poke unwanted space. ptes there are 32-bit long, and since pfn << PAGE_SIZE may exceed this limit, high pfn bits are cropped and wrong address mapped by kmap_atomic_pfn in copy_oldmem_page. Don't allow this behavior in non-PAE kdump kernels by checking pfns passed into copy_oldmem_page. In the case of failure, userspace process gets EFAULT. [v2] - fix comments - move ifdefs inside the function Signed-off-by: Jiri Slaby Cc: Vivek Goyal Cc: Eric W. Biederman Cc: Simon Horman Cc: Paul Mundt LKML-Reference: <1256551903-30567-1-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash_dump_32.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index f7cdb3b457a..cd97ce18c29 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -16,6 +16,22 @@ static void *kdump_buf_page; /* Stores the physical address of elf header of crash image. */ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; +static inline bool is_crashed_pfn_valid(unsigned long pfn) +{ +#ifndef CONFIG_X86_PAE + /* + * non-PAE kdump kernel executed from a PAE one will crop high pte + * bits and poke unwanted space counting again from address 0, we + * don't want that. pte must fit into unsigned long. In fact the + * test checks high 12 bits for being zero (pfn will be shifted left + * by PAGE_SHIFT). + */ + return pte_pfn(pfn_pte(pfn, __pgprot(0))) == pfn; +#else + return true; +#endif +} + /** * copy_oldmem_page - copy one page from "oldmem" * @pfn: page frame number to be copied @@ -41,6 +57,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; + if (!is_crashed_pfn_valid(pfn)) + return -EFAULT; + vaddr = kmap_atomic_pfn(pfn, KM_PTE0); if (!userbuf) { -- cgit v1.2.3 From 81766741fe1eee3884219e8daaf03f466f2ed52f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Oct 2009 15:20:29 +0000 Subject: x86-64: Fix register leak in 32-bit syscall audting Restoring %ebp after the call to audit_syscall_exit() is not only unnecessary (because the register didn't get clobbered), but in the sysenter case wasn't even doing the right thing: It loaded %ebp from a location below the top of stack (RBP < ARGOFFSET), i.e. arbitrary kernel data got passed back to user mode in the register. Signed-off-by: Jan Beulich Acked-by: Roland McGrath Cc: LKML-Reference: <4AE5CC4D020000780001BD13@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 1733f9f65e8..581b0568fe1 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -204,7 +204,7 @@ sysexit_from_sys_call: movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ .endm - .macro auditsys_exit exit,ebpsave=RBP + .macro auditsys_exit exit testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) jnz ia32_ret_from_sys_call TRACE_IRQS_ON @@ -217,7 +217,6 @@ sysexit_from_sys_call: call audit_syscall_exit GET_THREAD_INFO(%r10) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ - movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi cli TRACE_IRQS_OFF @@ -351,7 +350,7 @@ cstar_auditsys: jmp cstar_dispatch sysretl_audit: - auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ + auditsys_exit sysretl_from_sys_call #endif cstar_tracesys: -- cgit v1.2.3 From 772be899bc022ef2b911c3611b487d417e3269c3 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 20 Oct 2009 12:54:02 +0800 Subject: x86: Make EFI RTC function depend on 32bit again The EFI RTC functions are only available on 32 bit. commit 7bd867df (x86: Move get/set_wallclock to x86_platform_ops) removed the 32bit dependency which leads to boot crashes on 64bit EFI systems. Add the dependency back. Solves: http://bugzilla.kernel.org/show_bug.cgi?id=14466 Tested-by: Matthew Garrett Signed-off-by: Feng Tang LKML-Reference: <20091020125402.028d66d5@feng-desktop> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/efi.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index ad5bd988fb7..cdcfb122f25 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -454,8 +454,10 @@ void __init efi_init(void) if (add_efi_memmap) do_add_efi_memmap(); +#ifdef CONFIG_X86_32 x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; +#endif /* Setup for EFI runtime service */ reboot_type = BOOT_EFI; -- cgit v1.2.3 From ca0207114f1708b563f510b7781a360ec5b98359 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 28 Oct 2009 18:02:26 +0100 Subject: x86/amd-iommu: Un__init function required on shutdown The function iommu_feature_disable is required on system shutdown to disable the IOMMU but it is marked as __init. This may result in a panic if the memory is reused. This patch fixes this bug. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 498c8c79aaa..1e423b2add1 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -240,7 +240,7 @@ static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; -- cgit v1.2.3 From 16121d70fdf9eeb05ead46b241a293156323dbbe Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 1 Nov 2009 19:27:05 -0500 Subject: x86: Fix printk message typo in mtrr cleanup code Trivial typo. Signed-off-by: Dave Jones LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/cleanup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 315738c74aa..73c86db5acb 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -846,7 +846,7 @@ int __init mtrr_cleanup(unsigned address_bits) sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM coverred: %ldM\n", + printk(KERN_INFO "total RAM covered: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); if (mtrr_chunk_size && mtrr_gran_size) { -- cgit v1.2.3 From 05154752cf3767c544b65b5e340793d40b3f1229 Mon Sep 17 00:00:00 2001 From: Gottfried Haider Date: Mon, 2 Nov 2009 11:51:11 +0100 Subject: x86: Add reboot quirk for 3 series Mac mini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reboot does not work out of the box on my "Early 2009" Mac mini (3,1). Detect this machine via DMI as we do for recent MacBooks. Signed-off-by: Gottfried Haider Cc: Ozan Çağlayan Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a1a3cdda06e..f93078746e0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -436,6 +436,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), }, }, + { /* Handle problems with rebooting on Apple Macmini3,1 */ + .callback = set_pci_reboot, + .ident = "Apple Macmini3,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), + }, + }, { } }; -- cgit v1.2.3 From 6b9de613ae9c79b637e070136585dde029578065 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 2 Nov 2009 20:36:51 +0100 Subject: sched: Disable SD_PREFER_LOCAL at node level Yanmin Zhang reported that SD_PREFER_LOCAL induces an order of magnitude increase in select_task_rq_fair() overhead while running heavy wakeup benchmarks (tbench and vmark). Since SD_BALANCE_WAKE is off at node level, turn SD_PREFER_LOCAL off as well pending further investigation. Reported-by: Zhang, Yanmin Signed-off-by: Mike Galbraith Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d823c245f63..40e37b10c6c 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,7 +143,7 @@ extern unsigned long node_remap_size[]; | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 1*SD_WAKE_AFFINE \ - | 1*SD_PREFER_LOCAL \ + | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ | 0*SD_SHARE_PKG_RESOURCES \ -- cgit v1.2.3 From 82d6469916c6fcfa345636a49004c9d1753905d1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Oct 2009 16:41:15 -0700 Subject: xen: mask extended topology info in cpuid A Xen guest never needs to know about extended topology, and knowing would just confuse it. This patch just zeros ebx in leaf 0xb which indicates no topology info, preventing a crash under Xen on cpus which support this leaf. Signed-off-by: Jeremy Fitzhardinge Cc: Stable Kernel --- arch/x86/xen/enlighten.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 23a4d80fb39..dfbf70e6586 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -178,6 +178,7 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { + unsigned maskebx = ~0; unsigned maskecx = ~0; unsigned maskedx = ~0; @@ -185,9 +186,16 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ - if (*ax == 1) { + switch (*ax) { + case 1: maskecx = cpuid_leaf1_ecx_mask; maskedx = cpuid_leaf1_edx_mask; + break; + + case 0xb: + /* Suppress extended topology stuff */ + maskebx = 0; + break; } asm(XEN_EMULATE_PREFIX "cpuid" @@ -197,6 +205,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, "=d" (*dx) : "0" (*ax), "2" (*cx)); + *bx &= maskebx; *cx &= maskecx; *dx &= maskedx; } -- cgit v1.2.3 From 89240ba059ca468ae7a8346edf7f95082458c2fc Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Tue, 3 Nov 2009 10:22:40 +0100 Subject: x86, fs: Fix x86 procfs stack information for threads on 64-bit This patch fixes two issues in the procfs stack information on x86-64 linux. The 32 bit loader compat_do_execve did not store stack start. (this was figured out by Alexey Dobriyan). The stack information on a x64_64 kernel always shows 0 kbyte stack usage, because of a missing implementation of the KSTK_ESP macro which always returned -1. The new implementation now returns the right value. Signed-off-by: Stefani Seibold Cc: Americo Wang Cc: Alexey Dobriyan Cc: Al Viro Cc: Andrew Morton LKML-Reference: <1257240160.4889.24.camel@wall-e> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/process_64.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c3429e8b242..c9786480f0f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -1000,7 +1000,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) -#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */ +extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad535b68317..eb62cbcaa49 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -664,3 +664,8 @@ long sys_arch_prctl(int code, unsigned long addr) return do_arch_prctl(current, code, addr); } +unsigned long KSTK_ESP(struct task_struct *task) +{ + return (test_tsk_thread_flag(task, TIF_IA32)) ? + (task_pt_regs(task)->sp) : ((task)->thread.usersp); +} -- cgit v1.2.3 From a9e38c3e01ad242fe2a625354cf065c34b01e3aa Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 23 Oct 2009 09:37:00 +0200 Subject: KVM: x86: Catch potential overrun in MCE setup We only allocate memory for 32 MCE banks (KVM_MAX_MCE_BANKS) but we allow user space to fill up to 255 on setup (mcg_cap & 0xff), corrupting kernel memory. Catch these overflows. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b9695322f5..8a93fa894ba 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1692,7 +1692,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, unsigned bank_num = mcg_cap & 0xff, bank; r = -EINVAL; - if (!bank_num) + if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) goto out; if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) goto out; -- cgit v1.2.3 From abb3911965c1bd8eea305f64d4840a314259d96d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 25 Oct 2009 17:42:02 +0200 Subject: KVM: get_tss_base_addr() should return a gpa_t If TSS we are switching to resides in high memory task switch will fail since address will be truncated. Windows2k3 does this sometimes when running with more then 4G Cc: stable@kernel.org Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a93fa894ba..ae07d261527 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4051,7 +4051,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); } -static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, +static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, struct desc_struct *seg_desc) { u32 base_addr = get_desc_base(seg_desc); -- cgit v1.2.3 From 2c75910f1aa042be1dd769378d2611bf551721ac Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Thu, 5 Nov 2009 11:47:08 +0100 Subject: x86: Make sure get_user_desc() doesn't sign extend. The current implementation of get_user_desc() sign extends the return value because of integer promotion rules. For the most part, this doesn't matter, because the top bit of base2 is usually 0. If, however, that bit is 1, then the entire value will be 0xffff... which is probably not what the caller intended. This patch casts the entire thing to unsigned before returning, which generates almost the same assembly as the current code but replaces the final "cltq" (sign extend) with a "mov %eax %eax" (zero-extend). This fixes booting certain guests under KVM. Signed-off-by: Chris Lalancette Signed-off-by: Linus Torvalds --- arch/x86/include/asm/desc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e8de2f6f5ca..617bd56b307 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -288,7 +288,7 @@ static inline void load_LDT(mm_context_t *pc) static inline unsigned long get_desc_base(const struct desc_struct *desc) { - return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); + return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); } static inline void set_desc_base(struct desc_struct *desc, unsigned long base) -- cgit v1.2.3 From f1b291d4c47440cbfc1a478e88800e2742d60a80 Mon Sep 17 00:00:00 2001 From: Simon Kagstrom Date: Fri, 6 Nov 2009 15:44:04 +0100 Subject: x86: Add Phoenix/MSC BIOSes to lowmem corruption list We have a board with a Phoenix/MSC BIOS which also corrupts the low 64KB of RAM, so add an entry to the table. Signed-off-by: Simon Kagstrom LKML-Reference: <20091106154404.002648d9@marrow.netinsight.se> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2c14b..2a34f9c5be2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -659,6 +659,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), }, }, + { + .callback = dmi_low_memory_corruption, + .ident = "Phoenix/MSC BIOS", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), + }, + }, { /* * AMI BIOS with low memory corruption was found on Intel DG45ID board. -- cgit v1.2.3 From de2a47cf2b3f59ef9664b277f4021b91af13598e Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Thu, 5 Nov 2009 10:43:51 +0800 Subject: x86: Fix error return sequence in __ioremap_caller() kernel missed to free memtype if get_vm_area_caller failed in __ioremap_caller. This patch introduces error path to fix this and cleans up the repetitive error return sequences that contributed to the creation of the bug. Signed-off-by: Xiaotian Feng Acked-by: Suresh Siddha Cc: Venkatesh Pallipadi Cc: H. Peter Anvin LKML-Reference: <1257389031-20429-1-git-send-email-dfeng@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 334e63ca7b2..2feb9bdedaa 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -170,8 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, (unsigned long long)phys_addr, (unsigned long long)(phys_addr + size), prot_val, new_prot_val); - free_memtype(phys_addr, phys_addr + size); - return NULL; + goto err_free_memtype; } prot_val = new_prot_val; } @@ -197,26 +196,25 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, */ area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) - return NULL; + goto err_free_memtype; area->phys_addr = phys_addr; vaddr = (unsigned long) area->addr; - if (kernel_map_sync_memtype(phys_addr, size, prot_val)) { - free_memtype(phys_addr, phys_addr + size); - free_vm_area(area); - return NULL; - } + if (kernel_map_sync_memtype(phys_addr, size, prot_val)) + goto err_free_area; - if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { - free_memtype(phys_addr, phys_addr + size); - free_vm_area(area); - return NULL; - } + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) + goto err_free_area; ret_addr = (void __iomem *) (vaddr + offset); mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); return ret_addr; +err_free_area: + free_vm_area(area); +err_free_memtype: + free_memtype(phys_addr, phys_addr + size); + return NULL; } /** -- cgit v1.2.3 From eb647138acefc897c0eb6eddd5d3650966dfe627 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 8 Nov 2009 12:12:14 +0100 Subject: x86/PCI: Adjust GFP mask handling for coherent allocations Rather than forcing GFP flags and DMA mask to be inconsistent, GFP flags should be determined even for the fallback device through dma_alloc_coherent_mask()/dma_alloc_coherent_gfp_flags(). This restores 64-bit behavior as it was prior to commits 8965eb19386fdf5ccd0ef8b02593eb8560aa3416 and 4a367f3a9dbf2e7ffcee4702203479809236ee6e (not sure why there are two of them), where GFP_DMA was forced on for 32-bit, but not for 64-bit, with the slight adjustment that afaict even 32-bit doesn't need this without CONFIG_ISA. Signed-off-by: Jan Beulich Acked-by: Takashi Iwai LKML-Reference: <4AF18187020000780001D8AA@vpn.id2.novell.com> Signed-off-by: Ingo Molnar Signed-off-by: Jesse Barnes --- arch/x86/include/asm/dma-mapping.h | 10 +++++++--- arch/x86/kernel/pci-dma.c | 6 ++---- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 0ee770d23d0..6a25d5d4283 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -14,6 +14,12 @@ #include #include +#ifdef CONFIG_ISA +# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) +#else +# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) +#endif + extern dma_addr_t bad_dma_address; extern int iommu_merge; extern struct device x86_dma_fallback_dev; @@ -124,10 +130,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; - if (!dev) { + if (!dev) dev = &x86_dma_fallback_dev; - gfp |= GFP_DMA; - } if (!is_device_dma_capable(dev)) return NULL; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b2a71dca564..a6e804d16c3 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -45,12 +45,10 @@ int iommu_pass_through __read_mostly; dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to older i386. */ +/* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", - .coherent_dma_mask = DMA_BIT_MASK(32), + .coherent_dma_mask = ISA_DMA_BIT_MASK, .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; EXPORT_SYMBOL(x86_dma_fallback_dev); -- cgit v1.2.3 From 506f90eeae682dc96c11c7aefac0262b3a560b49 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 29 Oct 2009 14:45:52 +0100 Subject: x86, amd-ucode: Check UCODE_MAGIC before loading the container file Signed-off-by: Borislav Petkov Signed-off-by: Andreas Herrmann LKML-Reference: <20091029134552.GC30802@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 366baa17991..f4c538b681c 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -317,6 +317,12 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) return UCODE_NFOUND; } + if (*(u32 *)firmware->data != UCODE_MAGIC) { + printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n", + *(u32 *)firmware->data); + return UCODE_ERROR; + } + ret = generic_load_microcode(cpu, firmware->data, firmware->size); release_firmware(firmware); -- cgit v1.2.3 From f7f3cad06080f14f60b1453af94463ff68ea2739 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sat, 24 Oct 2009 17:25:38 +0200 Subject: [CPUFREQ] longhaul: select Longhaul version 2 for capable CPUs There is a typo in the longhaul detection code so only Longhaul v1 or Longhaul v3 is selected. The Longhaul v2 is not selected even for CPUs which are capable of. Tested on PCChips Giga Pro board. Frequency changes work and the Longhaul v2 detects that the board is not capable of changing CPU voltage. Signed-off-by: Krzysztof Helt Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index ce2ed3e4aad..cabd2fa3fc9 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr)); break; case 1 ... 15: - longhaul_version = TYPE_LONGHAUL_V1; + longhaul_version = TYPE_LONGHAUL_V2; if (c->x86_mask < 8) { cpu_model = CPU_SAMUEL2; cpuname = "C3 'Samuel 2' [C5B]"; -- cgit v1.2.3 From c53614ec17fe6296a696aa4ac71a799814bb50c1 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 6 Oct 2009 17:36:53 +0200 Subject: [CPUFREQ] powernow-k8: Fix test in get_transition_latency() Not makes it a bool before the comparison. Signed-off-by: Roel Kluin Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 6394aa5c798..3f12dabeab5 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data) * set it to 1 to avoid problems in the future. * For all others it's a BIOS bug. */ - if (!boot_cpu_data.x86 == 0x11) + if (boot_cpu_data.x86 != 0x11) printk(KERN_ERR FW_WARN PFX "Invalid zero transition " "latency\n"); max_latency = 1; -- cgit v1.2.3 From 293afe44d75abce4252db76cbb303a7de4297ce1 Mon Sep 17 00:00:00 2001 From: John Villalovos Date: Fri, 25 Sep 2009 13:30:08 -0400 Subject: [CPUFREQ] acpi-cpufreq: blacklist Intel 0f68: Fix HT detection and put in notification message Removing the SMT/HT check, since the Errata doesn't mention Hyper-Threading. Adding in a printk, so that the user knows why acpi-cpufreq refuses to load. Also, once system is blacklisted, don't repeat checks to see if blacklisted. This also causes the message to only be printed once, rather than for each CPU. Signed-off-by: John L. Villalovos Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 7d5c3b0ea8d..8b581d3905c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -526,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) { - /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf + /* Intel Xeon Processor 7100 Series Specification Update + * http://www.intel.com/Assets/PDF/specupdate/314554.pdf * AL30: A Machine Check Exception (MCE) Occurring during an * Enhanced Intel SpeedStep Technology Ratio Change May Cause - * Both Processor Cores to Lock Up when HT is enabled*/ + * Both Processor Cores to Lock Up. */ if (c->x86_vendor == X86_VENDOR_INTEL) { if ((c->x86 == 15) && (c->x86_model == 6) && - (c->x86_mask == 8) && smt_capable()) + (c->x86_mask == 8)) { + printk(KERN_INFO "acpi-cpufreq: Intel(R) " + "Xeon(R) 7100 Errata AL30, processors may " + "lock up on frequency changes: disabling " + "acpi-cpufreq.\n"); return -ENODEV; + } } return 0; } @@ -549,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; +#ifdef CONFIG_SMP + static int blacklisted; +#endif dprintk("acpi_cpufreq_cpu_init\n"); #ifdef CONFIG_SMP - result = acpi_cpufreq_blacklist(c); - if (result) - return result; + if (blacklisted) + return blacklisted; + blacklisted = acpi_cpufreq_blacklist(c); + if (blacklisted) + return blacklisted; #endif data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); -- cgit v1.2.3 From 8dca15e40889e5d5e9655b03ba79c26200f760ce Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 2 Nov 2009 23:35:30 -0800 Subject: [CPUFREQ] speedstep-ich: fix error caused by 394122ab144dae4b276d74644a2f11c44a60ac5c "[CPUFREQ] cpumask: avoid playing with cpus_allowed in speedstep-ich.c" changed the code to mistakenly pass the current cpu as the "processor" argument of speedstep_get_frequency(), whereas it should be the type of the processor. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14340 Based on a patch by Dave Mueller. Signed-off-by: Rusty Russell Acked-by: Dominik Brodowski Reported-by: Dave Mueller Cc: Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 6911e91fb4f..3ae5a7a3a50 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void) return 0; } -struct get_freq_data { - unsigned int speed; - unsigned int processor; -}; - -static void get_freq_data(void *_data) +static void get_freq_data(void *_speed) { - struct get_freq_data *data = _data; + unsigned int *speed = _speed; - data->speed = speedstep_get_frequency(data->processor); + *speed = speedstep_get_frequency(speedstep_processor); } static unsigned int speedstep_get(unsigned int cpu) { - struct get_freq_data data = { .processor = cpu }; + unsigned int speed; /* You're supposed to ensure CPU is online. */ - if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) + if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0) BUG(); - dprintk("detected %u kHz as current frequency\n", data.speed); - return data.speed; + dprintk("detected %u kHz as current frequency\n", speed); + return speed; } /** -- cgit v1.2.3 From d77b81974521c82fa6fda38dfff1b491dcc62a32 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 24 Nov 2009 16:53:00 +0100 Subject: [CPUFREQ] Enable ACPI PDC handshake for VIA/Centaur CPUs In commit 0de51088e6a82bc8413d3ca9e28bbca2788b5b53, we introduced the use of acpi-cpufreq on VIA/Centaur CPU's by removing a vendor check for VENDOR_INTEL. However, as it turns out, at least the Nano CPU's also need the PDC (processor driver capabilities) handshake in order to activate the methods required for acpi-cpufreq. Since arch_acpi_processor_init_pdc() contains another vendor check for Intel, the PDC is not initialized on VIA CPU's. The resulting behavior of a current mainline kernel on such systems is: acpi-cpufreq loads and it indicates CPU frequency changes. However, the CPU stays at a single frequency This trivial patch ensures that init_intel_pdc() is called on Intel and VIA/Centaur CPU's alike. Signed-off-by: Harald Welte Signed-off-by: Dave Jones --- arch/x86/kernel/acpi/processor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index d296f4a195c..d85d1b2432b 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -79,7 +79,8 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) struct cpuinfo_x86 *c = &cpu_data(pr->id); pr->pdc = NULL; - if (c->x86_vendor == X86_VENDOR_INTEL) + if (c->x86_vendor == X86_VENDOR_INTEL || + c->x86_vendor == X86_VENDOR_CENTAUR) init_intel_pdc(pr, c); return; -- cgit v1.2.3