diff options
Diffstat (limited to 'arch/x86')
45 files changed, 370 insertions, 310 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 73f7fe8fd4d..9c39095b33f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1802,6 +1802,17 @@ config DMAR and include PCI device scope covered by these DMA remapping devices. +config DMAR_DEFAULT_ON + def_bool n + prompt "Enable DMA Remapping Devices by default" + depends on DMAR + help + Selecting this option will enable a DMAR device at boot time if + one is found. If this option is not selected, DMAR support can + be enabled by passing intel_iommu=on to the kernel. It is + recommended you say N here while the DMAR code remains + experimental. + config DMAR_GFX_WA def_bool y prompt "Support for Graphics workaround" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8078955845a..c98d52e8296 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -167,9 +167,9 @@ config MK7 config MK8 bool "Opteron/Athlon64/Hammer/K8" help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables - use of some extended instructions, and passes appropriate optimization - flags to GCC. + Select this for an AMD Opteron or Athlon64 Hammer-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. config MCRUSOE bool "Crusoe" @@ -256,9 +256,11 @@ config MPSC config MCORE2 bool "Core 2/newer Xeon" help - Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) - CPUs. You can distinguish newer from older Xeons by the CPU family - in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and + 53xx) CPUs. You can distinguish newer from older Xeons by the CPU + family in /proc/cpuinfo. Newer ones have 6 and older ones 15 + (not a typo) config GENERIC_CPU bool "Generic-x86-64" @@ -320,14 +322,14 @@ config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 help - Old PentiumPro multiprocessor systems had errata that could cause memory - operations to violate the x86 ordering standard in rare cases. Enabling this - option will attempt to work around some (but not all) occurances of - this problem, at the cost of much heavier spinlock and memory barrier - operations. + Old PentiumPro multiprocessor systems had errata that could cause + memory operations to violate the x86 ordering standard in rare cases. + Enabling this option will attempt to work around some (but not all) + occurances of this problem, at the cost of much heavier spinlock and + memory barrier operations. - If unsure, say n here. Even distro kernels should think twice before enabling - this: there are few systems, and an unlikely bug. + If unsure, say n here. Even distro kernels should think twice before + enabling this: there are few systems, and an unlikely bug. config X86_F00F_BUG def_bool y diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 10d6cc3fd05..e1983fa025d 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -174,28 +174,8 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config MMIOTRACE - bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && PCI - select TRACING - help - Mmiotrace traces Memory Mapped I/O access and is meant for - debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. Tracing is disabled by - default and can be enabled at run-time. - - See Documentation/tracers/mmiotrace.txt. - If you are not helping to develop drivers, say N. - -config MMIOTRACE_TEST - tristate "Test module for mmiotrace" - depends on MMIOTRACE && m - help - This is a dumb module for testing mmiotrace. It is very dangerous - as it will write garbage to IO memory starting at a given address. - However, it should be safe to use on e.g. unused portion of VRAM. - - Say N, unless you absolutely know what you are doing. +config HAVE_MMIOTRACE_SUPPORT + def_bool y # # IO delay types: diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 37822206083..3c601f8224b 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -23,8 +23,6 @@ */ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) { - u16 gs; - /* changed the size calculations - should hopefully work better. lbt */ dump->magic = CMAGIC; dump->start_code = 0; @@ -57,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) dump->regs.ds = (u16)regs->ds; dump->regs.es = (u16)regs->es; dump->regs.fs = (u16)regs->fs; - savesegment(gs, gs); + savesegment(gs, dump->regs.gs); dump->regs.orig_ax = regs->orig_ax; dump->regs.ip = regs->ip; dump->regs.cs = (u16)regs->cs; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ea408dcba51..7301e60dc4a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -93,6 +93,7 @@ #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ +#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index d2e3bf3608a..886c9402ec4 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,13 @@ #include <linux/types.h> #include <linux/ioctl.h> +/* Select x86 specific features in <linux/kvm.h> */ +#define __KVM_HAVE_PIT +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT +#define __KVM_HAVE_MSI +#define __KVM_HAVE_USER_NMI + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/math_emu.h b/arch/x86/include/asm/math_emu.h index 5a65b107ad5..031f6266f42 100644 --- a/arch/x86/include/asm/math_emu.h +++ b/arch/x86/include/asm/math_emu.h @@ -1,31 +1,18 @@ #ifndef _ASM_X86_MATH_EMU_H #define _ASM_X86_MATH_EMU_H +#include <asm/ptrace.h> +#include <asm/vm86.h> + /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved automatically by the 80386/80486. */ -struct info { +struct math_emu_info { long ___orig_eip; - long ___ebx; - long ___ecx; - long ___edx; - long ___esi; - long ___edi; - long ___ebp; - long ___eax; - long ___ds; - long ___es; - long ___fs; - long ___orig_eax; - long ___eip; - long ___cs; - long ___eflags; - long ___esp; - long ___ss; - long ___vm86_es; /* This and the following only in vm86 mode */ - long ___vm86_ds; - long ___vm86_fs; - long ___vm86_gs; + union { + struct pt_regs *regs; + struct kernel_vm86_regs *vm86; + }; }; #endif /* _ASM_X86_MATH_EMU_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 62d14ce3cd0..bd22f2a3713 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -60,6 +60,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); @@ -71,6 +72,11 @@ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, return 0; } #endif +#else /* !CONFIG_ACPI: */ +static inline int acpi_probe_gsi(void) +{ + return 0; +} #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e869..776579119a0 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t; typedef struct { pgprotval_t pgprot; } pgprot_t; extern int page_is_ram(unsigned long pagenr); -extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c09a1412758..e299287e8e3 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void) PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_cpu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } -} - +void arch_flush_lazy_cpu_mode(void); #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) @@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_mmu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } -} +void arch_flush_lazy_mmu_mode(void); static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, unsigned long phys, pgprot_t flags) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 06bbcbd66e9..4f5af8447d5 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -302,16 +302,30 @@ static inline pte_t pte_mkspecial(pte_t pte) extern pteval_t __supported_pte_mask; +/* + * Mask out unsupported bits in a present pgprot. Non-present pgprots + * can use those bits for other purposes, so leave them be. + */ +static inline pgprotval_t massage_pgprot(pgprot_t pgprot) +{ + pgprotval_t protval = pgprot_val(pgprot); + + if (protval & _PAGE_PRESENT) + protval &= __supported_pte_mask; + + return protval; +} + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) | - pgprot_val(pgprot)) & __supported_pte_mask); + return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -323,7 +337,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * the newprot (if present): */ val &= _PAGE_CHG_MASK; - val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask; + val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK; return __pte(val); } @@ -339,7 +353,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) -#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#define canon_pgprot(p) __pgprot(massage_pgprot(p)) static inline int is_new_memtype_allowed(unsigned long flags, unsigned long new_flags) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..3bfd5235a9e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -353,7 +353,7 @@ struct i387_soft_struct { u8 no_update; u8 rm; u8 alimit; - struct info *info; + struct math_emu_info *info; u32 entry_eip; }; diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 2ee0a3bceed..cf3bb053da0 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long); dotraplinkage void do_overflow(struct pt_regs *, long); dotraplinkage void do_bounds(struct pt_regs *, long); dotraplinkage void do_invalid_op(struct pt_regs *, long); -dotraplinkage void do_device_not_available(struct pt_regs *, long); +dotraplinkage void do_device_not_available(struct pt_regs); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); dotraplinkage void do_invalid_TSS(struct pt_regs *, long); dotraplinkage void do_segment_not_present(struct pt_regs *, long); @@ -77,7 +77,7 @@ extern int panic_on_unrecovered_nmi; extern int kstack_depth_to_print; void math_error(void __user *); -asmlinkage void math_emulate(long); +void math_emulate(struct math_emu_info *); #ifdef CONFIG_X86_32 unsigned long patch_espfix_desc(unsigned long, unsigned long); #else diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 7ef617ef1df..4bd990ee43d 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -137,7 +137,7 @@ static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) pte_t pte; pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) | - (pgprot_val(pgprot) & __supported_pte_mask); + massage_pgprot(pgprot); return pte; } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f43..7678f10c456 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -973,6 +973,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int __init acpi_probe_gsi(void) +{ + int idx; + int gsi; + int max_gsi = 0; + + if (acpi_disabled) + return 0; + + if (!acpi_ioapic) + return 0; + + max_gsi = 0; + for (idx = 0; idx < nr_ioapics; idx++) { + gsi = mp_ioapic_routing[idx].gsi_end; + + if (gsi > max_gsi) + max_gsi = gsi; + } + + return max_gsi + 1; +} + static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index fb039cd345d..6428aa17b40 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->cpu = pol->cpu; data->currpstate = HW_PSTATE_INVALID; - rc = powernow_k8_cpu_init_acpi(data); - if (rc) { + if (powernow_k8_cpu_init_acpi(data)) { /* * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. @@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI maintainers and complain to your BIOS " "vendor.\n"); #endif - goto err_out; + kfree(data); + return -ENODEV; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - goto err_out; + kfree(data); + return -ENODEV; } rc = find_psb_table(data); if (rc) { - goto err_out; + kfree(data); + return -ENODEV; } /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 430e5c38a54..24ff26a38ad 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -291,6 +291,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } + if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) + set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); + #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1b43086b097..231bdd3c5b1 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -488,20 +488,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) * ignore such a protection. */ asm volatile( - "1: " _ASM_MOV " (%[parent_old]), %[old]\n" - "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" + "1: " _ASM_MOV " (%[parent]), %[old]\n" + "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" " movl $0, %[faulted]\n" + "3:\n" ".section .fixup, \"ax\"\n" - "3: movl $1, %[faulted]\n" + "4: movl $1, %[faulted]\n" + " jmp 3b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) - : [parent_replaced] "=r" (parent), [old] "=r" (old), - [faulted] "=r" (faulted) - : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : [old] "=r" (old), [faulted] "=r" (faulted) + : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0b8ad..a00545fe5cd 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode, now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); + /* Make sure we use edge triggered interrupts */ + cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); @@ -897,7 +899,7 @@ static unsigned long hpet_rtc_flags; static int hpet_prev_update_sec; static struct rtc_time hpet_alarm_time; static unsigned long hpet_pie_count; -static unsigned long hpet_t1_cmp; +static u32 hpet_t1_cmp; static unsigned long hpet_default_delta; static unsigned long hpet_pie_delta; static unsigned long hpet_pie_limit; @@ -905,6 +907,14 @@ static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; /* + * Check that the hpet counter c1 is ahead of the c2 + */ +static inline int hpet_cnt_ahead(u32 c1, u32 c2) +{ + return (s32)(c2 - c1) < 0; +} + +/* * Registers a IRQ handler. */ int hpet_register_irq_handler(rtc_irq_handler handler) @@ -1075,7 +1085,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index dbd6c1d1b63..b42ca694dc6 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev) flags = claim_dma_lock(); - dma_outb(DMA1_RESET_REG, 0); - dma_outb(DMA2_RESET_REG, 0); + dma_outb(0, DMA1_RESET_REG); + dma_outb(0, DMA2_RESET_REG); - for (i = 0;i < 8;i++) { + for (i = 0; i < 8; i++) { set_dma_addr(i, 0x000000); /* DMA count is a bit weird so this is not 0 */ set_dma_count(i, 1); @@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class i8237_sysdev_class = { - .name = "i8237", - .suspend = i8237A_suspend, - .resume = i8237A_resume, + .name = "i8237", + .suspend = i8237A_suspend, + .resume = i8237A_resume, }; static struct sys_device device_i8237A = { - .id = 0, - .cls = &i8237_sysdev_class, + .id = 0, + .cls = &i8237_sysdev_class, }; static int __init i8237A_init_sysfs(void) @@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void) error = sysdev_register(&device_i8237A); return error; } - device_initcall(i8237A_init_sysfs); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9b0c480c383..bc7ac4da90d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3841,14 +3841,24 @@ int __init io_apic_get_redir_entries (int ioapic) void __init probe_nr_irqs_gsi(void) { - int idx; int nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) + nr = acpi_probe_gsi(); + if (nr > nr_irqs_gsi) { nr_irqs_gsi = nr; + } else { + /* for acpi=off or acpi is not compiled in */ + int idx; + + nr = 0; + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; + } + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } /* -------------------------------------------------------------------------- diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 7a13fac63a1..4006c522adc 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = 0xc2; + olpc_platform_info.boardrev = olpc_board(0xc2); } #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb60887..c6520a4e85d 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void arch_flush_lazy_mmu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + WARN_ON(preempt_count() == 1); + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + +void arch_flush_lazy_cpu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + WARN_ON(preempt_count() == 1); + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } + + preempt_enable(); +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e3086..6d12f7e37f8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) @@ -194,6 +197,9 @@ static void mwait_idle(void) struct power_trace it; if (!need_resched()) { trace_power_start(&it, POWER_CSTATE, 1); + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4..85b4cb5c198 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -40,6 +40,7 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/ftrace.h> +#include <linux/dmi.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -151,14 +152,18 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, cs, es; + const char *board; printk("\n"); print_modules(); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb..5a4c23d8989 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - if (unlikely(child->bts)) { - ds_release_bts(child->bts); - child->bts = NULL; - - ptrace_bts_free_buffer(child); - } + /* + * Ptrace_detach() races with ptrace_untrace() in case + * the child dies and is reaped by another thread. + * + * We only do the memory accounting at this point and + * leave the buffer deallocation and the bts tracer + * release to ptrace_bts_untrace() which will be called + * later on with tasklist_lock held. + */ + release_locked_buffer(child->bts_buffer, child->bts_size); } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d8042cf6..c461f6d6907 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -607,7 +607,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { printk(KERN_NOTICE - "%s detected: BIOS may corrupt low RAM, working it around.\n", + "%s detected: BIOS may corrupt low RAM, working around it.\n", d->ident); e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98c2d055284..a9e7548e179 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void conditional_cli(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); +} + static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -626,8 +632,10 @@ clear_dr7: #ifdef CONFIG_X86_32 debug_vm86: + /* reenable preemption: handle_vm86_trap() might sleep */ + dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); + conditional_cli(regs); return; #endif @@ -896,7 +904,7 @@ asmlinkage void math_state_restore(void) EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION -asmlinkage void math_emulate(long arg) +void math_emulate(struct math_emu_info *info) { printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); @@ -906,16 +914,19 @@ asmlinkage void math_emulate(long arg) } #endif /* CONFIG_MATH_EMULATION */ -dotraplinkage void __kprobes -do_device_not_available(struct pt_regs *regs, long error) +dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) { #ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { - conditional_sti(regs); - math_emulate(0); + struct math_emu_info info = { }; + + conditional_sti(®s); + + info.regs = ®s; + math_emulate(&info); } else { math_state_restore(); /* interrupts still off */ - conditional_sti(regs); + conditional_sti(®s); } #else math_state_restore(); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 1d3302cc2dd..bef58b4982d 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -321,6 +321,16 @@ static void vmi_release_pmd(unsigned long pfn) } /* + * We use the pgd_free hook for releasing the pgd page: + */ +static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; + + vmi_ops.release_page(pfn, VMI_PAGE_L2); +} + +/* * Helper macros for MMU update flags. We can defer updates until a flush * or page invalidation only if the update is to the current address space * (otherwise, there is no flush). We must check against init_mm, since @@ -762,6 +772,7 @@ static inline int __init activate_vmi(void) if (vmi_ops.release_page) { pv_mmu_ops.release_pte = vmi_release_pte; pv_mmu_ops.release_pmd = vmi_release_pmd; + pv_mmu_ops.pgd_free = vmi_pgd_free; } /* Set linear is needed in all cases */ diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index e665d1c623c..72bd275a9b5 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) hrtimer_add_expires_ns(&pt->timer, pt->period); pt->scheduled = hrtimer_get_expires_ns(&pt->timer); if (pt->period) - ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); + ps->channels[0].count_load_time = ktime_get(); return (pt->period == 0 ? 0 : 1); } diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index c019b8edcdb..cf17ed52f6f 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - kvm_apic_timer_intr_post(vcpu, vec); - /* TODO: PIT, RTC etc. */ -} -EXPORT_SYMBOL_GPL(kvm_timer_intr_post); - void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { __kvm_migrate_apic_timer(vcpu); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2bf32a03cee..82579ee538d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afac68c0815..f0b67f2cdd6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -35,6 +35,12 @@ #include "kvm_cache_regs.h" #include "irq.h" +#ifndef CONFIG_X86_64 +#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) +#else +#define mod_64(x, y) ((x) % (y)) +#endif + #define PRId64 "d" #define PRIx64 "llx" #define PRIu64 "u" @@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic) { - u64 counter_passed; - ktime_t passed, now; + ktime_t remaining; + s64 ns; u32 tmcct; ASSERT(apic != NULL); - now = apic->timer.dev.base->get_time(); - tmcct = apic_get_reg(apic, APIC_TMICT); - /* if initial count is 0, current count should also be 0 */ - if (tmcct == 0) + if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - if (unlikely(ktime_to_ns(now) <= - ktime_to_ns(apic->timer.last_update))) { - /* Wrap around */ - passed = ktime_add(( { - (ktime_t) { - .tv64 = KTIME_MAX - - (apic->timer.last_update).tv64}; } - ), now); - apic_debug("time elapsed\n"); - } else - passed = ktime_sub(now, apic->timer.last_update); - - counter_passed = div64_u64(ktime_to_ns(passed), - (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); - - if (counter_passed > tmcct) { - if (unlikely(!apic_lvtt_period(apic))) { - /* one-shot timers stick at 0 until reset */ - tmcct = 0; - } else { - /* - * periodic timers reset to APIC_TMICT when they - * hit 0. The while loop simulates this happening N - * times. (counter_passed %= tmcct) would also work, - * but might be slower or not work on 32-bit?? - */ - while (counter_passed > tmcct) - counter_passed -= tmcct; - tmcct -= counter_passed; - } - } else { - tmcct -= counter_passed; - } + remaining = hrtimer_expires_remaining(&apic->timer.dev); + if (ktime_to_ns(remaining) < 0) + remaining = ktime_set(0, 0); + + ns = mod_64(ktime_to_ns(remaining), apic->timer.period); + tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); return tmcct; } @@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->timer.dev.base->get_time(); - apic->timer.last_update = now; - apic->timer.period = apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->timer.divide_count; atomic_set(&apic->timer.pending, 0); @@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) } } -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) - apic->timer.last_update = ktime_add_ns( - apic->timer.last_update, - apic->timer.period); -} - int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { int vector = kvm_apic_has_interrupt(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 81858881287..45ab6ee7120 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -12,7 +12,6 @@ struct kvm_lapic { atomic_t pending; s64 period; /* unit: ns */ u32 divide_count; - ktime_t last_update; struct hrtimer dev; } timer; struct kvm_vcpu *vcpu; @@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 83f11c7474a..2d4477c7147 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, if (largepage) spte |= PT_PAGE_SIZE_MASK; if (mt_mask) { - mt_mask = get_memory_type(vcpu, gfn) << - kvm_x86_ops->get_mt_mask_shift(); + if (!kvm_is_mmio_pfn(pfn)) { + mt_mask = get_memory_type(vcpu, gfn) << + kvm_x86_ops->get_mt_mask_shift(); + mt_mask |= VMX_EPT_IGMT_BIT; + } else + mt_mask = MTRR_TYPE_UNCACHABLE << + kvm_x86_ops->get_mt_mask_shift(); spte |= mt_mask; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1452851ae25..a9e769e4e25 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) /* Okay, we can deliver the interrupt: grab it and update PIC state. */ intr_vector = kvm_cpu_get_interrupt(vcpu); svm_inject_irq(svm, intr_vector); - kvm_timer_intr_post(vcpu, intr_vector); out: update_cr8_intercept(vcpu); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6259d746764..7611af57682 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: + vmx_load_host_state(to_vmx(vcpu)); msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { data = msr->data; @@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) } if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); if (kvm_cpu_has_interrupt(vcpu)) enable_irq_window(vcpu); } @@ -3687,8 +3687,7 @@ static int __init vmx_init(void) if (vm_need_ept()) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK | - VMX_EPT_IGMT_BIT); + VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK, VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc17546a240..758b7a155ae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: - case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: @@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; + case KVM_CAP_CLOCKSOURCE: + r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); + break; default: r = 0; break; @@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm) } -void kvm_arch_destroy_vm(struct kvm *kvm) +void kvm_arch_sync_events(struct kvm *kvm) { kvm_free_all_assigned_devices(kvm); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ kvm_iommu_unmap_guest(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index c7b06feb139..5d87f586f8d 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -131,7 +131,7 @@ u_char emulating = 0; static int valid_prefix(u_char *Byte, u_char __user ** fpu_eip, overrides * override); -asmlinkage void math_emulate(long arg) +void math_emulate(struct math_emu_info *info) { u_char FPU_modrm, byte1; unsigned short code; @@ -161,7 +161,7 @@ asmlinkage void math_emulate(long arg) RE_ENTRANT_CHECK_ON; #endif /* RE_ENTRANT_CHECKING */ - SETUP_DATA_AREA(arg); + FPU_info = info; FPU_ORIG_EIP = FPU_EIP; @@ -659,7 +659,7 @@ static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, } } -void math_abort(struct info *info, unsigned int signal) +void math_abort(struct math_emu_info *info, unsigned int signal) { FPU_EIP = FPU_ORIG_EIP; current->thread.trap_no = 16; diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h index aa49b6a0d85..9779df436b7 100644 --- a/arch/x86/math-emu/fpu_proto.h +++ b/arch/x86/math-emu/fpu_proto.h @@ -51,8 +51,8 @@ extern void ffreep(void); extern void fst_i_(void); extern void fstp_i(void); /* fpu_entry.c */ -asmlinkage extern void math_emulate(long arg); -extern void math_abort(struct info *info, unsigned int signal); +extern void math_emulate(struct math_emu_info *info); +extern void math_abort(struct math_emu_info *info, unsigned int signal); /* fpu_etc.c */ extern void FPU_etc(void); /* fpu_tags.c */ diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 13488fa153e..50fa0ec2c8a 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -16,10 +16,6 @@ #include <linux/kernel.h> #include <linux/mm.h> -/* This sets the pointer FPU_info to point to the argument part - of the stack frame of math_emulate() */ -#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg - /* s is always from a cpu register, and the cpu does bounds checking * during register load --> no further bounds checks needed */ #define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) @@ -38,12 +34,12 @@ #define I387 (current->thread.xstate) #define FPU_info (I387->soft.info) -#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) -#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) -#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) -#define FPU_EAX (FPU_info->___eax) -#define FPU_EFLAGS (FPU_info->___eflags) -#define FPU_EIP (FPU_info->___eip) +#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->regs->ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->regs->ds)) +#define FPU_EAX (FPU_info->regs->ax) +#define FPU_EFLAGS (FPU_info->regs->flags) +#define FPU_EIP (FPU_info->regs->ip) #define FPU_ORIG_EIP (FPU_info->___orig_eip) #define FPU_lookahead (I387->soft.lookahead) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index d701e2b39e4..420b3b6e391 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -29,46 +29,43 @@ #define FPU_WRITE_BIT 0x10 static int reg_offset[] = { - offsetof(struct info, ___eax), - offsetof(struct info, ___ecx), - offsetof(struct info, ___edx), - offsetof(struct info, ___ebx), - offsetof(struct info, ___esp), - offsetof(struct info, ___ebp), - offsetof(struct info, ___esi), - offsetof(struct info, ___edi) + offsetof(struct pt_regs, ax), + offsetof(struct pt_regs, cx), + offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, bx), + offsetof(struct pt_regs, sp), + offsetof(struct pt_regs, bp), + offsetof(struct pt_regs, si), + offsetof(struct pt_regs, di) }; -#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) +#define REG_(x) (*(long *)(reg_offset[(x)] + (u_char *)FPU_info->regs)) static int reg_offset_vm86[] = { - offsetof(struct info, ___cs), - offsetof(struct info, ___vm86_ds), - offsetof(struct info, ___vm86_es), - offsetof(struct info, ___vm86_fs), - offsetof(struct info, ___vm86_gs), - offsetof(struct info, ___ss), - offsetof(struct info, ___vm86_ds) + offsetof(struct pt_regs, cs), + offsetof(struct kernel_vm86_regs, ds), + offsetof(struct kernel_vm86_regs, es), + offsetof(struct kernel_vm86_regs, fs), + offsetof(struct kernel_vm86_regs, gs), + offsetof(struct pt_regs, ss), + offsetof(struct kernel_vm86_regs, ds) }; #define VM86_REG_(x) (*(unsigned short *) \ - (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) - -/* This dummy, gs is not saved on the stack. */ -#define ___GS ___ds + (reg_offset_vm86[((unsigned)x)] + (u_char *)FPU_info->regs)) static int reg_offset_pm[] = { - offsetof(struct info, ___cs), - offsetof(struct info, ___ds), - offsetof(struct info, ___es), - offsetof(struct info, ___fs), - offsetof(struct info, ___GS), - offsetof(struct info, ___ss), - offsetof(struct info, ___ds) + offsetof(struct pt_regs, cs), + offsetof(struct pt_regs, ds), + offsetof(struct pt_regs, es), + offsetof(struct pt_regs, fs), + offsetof(struct pt_regs, ds), /* dummy, not saved on stack */ + offsetof(struct pt_regs, ss), + offsetof(struct pt_regs, ds) }; #define PM_REG_(x) (*(unsigned short *) \ - (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) + (reg_offset_pm[((unsigned)x)] + (u_char *)FPU_info->regs)) /* Decode the SIB byte. This function assumes mod != 0 */ static int sib(int mod, unsigned long *fpu_eip) @@ -349,34 +346,34 @@ void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, } switch (rm) { case 0: - address += FPU_info->___ebx + FPU_info->___esi; + address += FPU_info->regs->bx + FPU_info->regs->si; break; case 1: - address += FPU_info->___ebx + FPU_info->___edi; + address += FPU_info->regs->bx + FPU_info->regs->di; break; case 2: - address += FPU_info->___ebp + FPU_info->___esi; + address += FPU_info->regs->bp + FPU_info->regs->si; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 3: - address += FPU_info->___ebp + FPU_info->___edi; + address += FPU_info->regs->bp + FPU_info->regs->di; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 4: - address += FPU_info->___esi; + address += FPU_info->regs->si; break; case 5: - address += FPU_info->___edi; + address += FPU_info->regs->di; break; case 6: - address += FPU_info->___ebp; + address += FPU_info->regs->bp; if (addr_modes.override.segment == PREFIX_DEFAULT) addr_modes.override.segment = PREFIX_SS_; break; case 7: - address += FPU_info->___ebx; + address += FPU_info->regs->bx; break; } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index af750ab973b..f45d5e29a72 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr) return 0; } -int pagerange_is_ram(unsigned long start, unsigned long end) -{ - int ram_page = 0, not_rampage = 0; - unsigned long page_nr; - - for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); - ++page_nr) { - if (page_is_ram(page_nr)) - ram_page = 1; - else - not_rampage = 1; - - if (ram_page == not_rampage) - return -1; - } - - return ram_page; -} - /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 84ba74820ad..8ca0d8566fc 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; - repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, vm_unmap_aliases(); + /* + * If we're called with lazy mmu updates enabled, the + * in-memory pte state may be stale. Flush pending updates to + * bring them up to date. + */ + arch_flush_lazy_mmu_mode(); + cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; @@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } else cpa_flush_all(cache); + /* + * If we've been called with lazy mmu updates enabled, then + * make sure that everything gets flushed out before we + * return. + */ + arch_flush_lazy_mmu_mode(); + out: return ret; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7b61036427d..aebbf67a79d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) static struct memtype *cached_entry; static u64 cached_start; +static int pat_pagerange_is_ram(unsigned long start, unsigned long end) +{ + int ram_page = 0, not_rampage = 0; + unsigned long page_nr; + + for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); + ++page_nr) { + /* + * For legacy reasons, physical address range in the legacy ISA + * region is tracked as non-RAM. This will allow users of + * /dev/mem to map portions of legacy ISA region, even when + * some of those portions are listed(or not even listed) with + * different e820 types(RAM/reserved/..) + */ + if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && + page_is_ram(page_nr)) + ram_page = 1; + else + not_rampage = 1; + + if (ram_page == not_rampage) + return -1; + } + + return ram_page; +} + /* * For RAM pages, mark the pages as non WB memory type using * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or @@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_type) *new_type = actual_type; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, - new_type); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return reserve_ram_pages_type(start, end, req_type, + new_type); + else if (is_range_ram < 0) + return -EINVAL; new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end) if (is_ISA_range(start, end - 1)) return 0; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) + return -EINVAL; spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { @@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); - if (is_ram != 0) { - /* - * For mapping RAM pages, drivers need to call - * set_memory_[uc|wc|wb] directly, for reserve and free, before - * setting up the PTE. - */ - WARN_ON_ONCE(1); - return 0; - } + /* + * reserve_pfn_range() doesn't support RAM pages. + */ + if (is_ram != 0) + return -EINVAL; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) @@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) { int is_ram; - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); if (is_ram == 0) free_memtype(paddr, paddr + size); } |