From 79f3b3cb7a2586b319a43a7f29924c6c555e4357 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Thu, 8 Jan 2009 10:04:30 -0500 Subject: x86, mtrr: fix types used in userspace exported header Commit 932d27a7913fc6b3c64c6e6082628b0a1561dec9 exported some mtrr structures without using the exportable __uX types, causing userspace build failures. Signed-off-by: Kyle McMartin Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index cb988aab716..14080d22edb 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -58,15 +58,15 @@ struct mtrr_gentry { #endif /* !__i386__ */ struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; + __u32 base_lo; + __u32 base_hi; + __u32 mask_lo; + __u32 mask_hi; }; /* In the Intel processor's MTRR interface, the MTRR type is always held in an 8 bit field: */ -typedef u8 mtrr_type; +typedef __u8 mtrr_type; #define MTRR_NUM_FIXED_RANGES 88 #define MTRR_MAX_VAR_RANGES 256 -- cgit v1.2.3 From f313e12308f7c5ea645f18e759d104d088b18615 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 9 Jan 2009 12:17:43 -0800 Subject: x86: avoid theoretical vmalloc fault loop Ajith Kumar noticed: I was going through the vmalloc fault handling for x86_64 and am unclear about the following lines in the vmalloc_fault() function. pgd = pgd_offset(current->mm ?: &init_mm, address); pgd_ref = pgd_offset_k(address); Here the intention is to get the pgd corresponding to the current process and sync it up with the pgd in init_mm(obtained from pgd_offset_k). However, for kernel threads current->mm is NULL and hence pgd = pgd_offset(init_mm, address) = pgd_ref which means the fault handler returns without setting the pgd entry in the MM structure in the context of which the kernel thread has faulted. This could lead to never-ending faults and busy looping of kernel threads like pdflush. So, shouldn't the pgd = pgd_offset(current->mm ?: &init_mm, address); be pgd = pgd_offset(current->active_mm ?: &init_mm, address); We can use active_mm unconditionally because it should be always set. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9e268b6b204..90dfae511a4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -534,7 +534,7 @@ static int vmalloc_fault(unsigned long address) happen within a race in page table update. In the later case just flush. */ - pgd = pgd_offset(current->mm ?: &init_mm, address); + pgd = pgd_offset(current->active_mm, address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; -- cgit v1.2.3 From 4884d8e6a05026ec906355436cea9dc1acb1d09e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 11 Jan 2009 18:38:55 +0530 Subject: x86: fix mpparse.c build error on latest git Fix this by reintroducing asm/smp.h include in mpparse.c - later on I will fix this by removing non-smp data from smp.h. Reported-by: Petr Titera Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c0601c2848a..a649a4ccad4 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 2bc1379712e74c5b99adaa6db433c14d8841ab4f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 11 Jan 2009 20:34:47 +0530 Subject: x86: fix apic.c build error on latest git Fix this by reintroducing asm/smp.h include in apic.c - later on I will fix this by removing non-smp data from smp.h Also fix the __inquire_remote_apic() prototype/inline. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_wakecpu.h | 6 ++++++ arch/x86/kernel/apic.c | 1 + 2 files changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index ceb01366014..89897a6a65b 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -24,7 +24,13 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } +#ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ static inline void inquire_remote_apic(int apicid) { diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 566a08466b1..0f830e4f567 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 50c668d678fd01284799a6e4f1b91829d83cb9ed Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 10:49:53 +0100 Subject: Revert "cpumask: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write" This reverts commit 7503bfbae89eba07b46441a5d1594647f6b8ab7d. Dieter Ries reported bootup soft-hangs and bisected it back to this commit, and reverting this commit gave him a working system. The commit introduces work_on_cpu() use into the cpufreq code, but that is subtly problematic from a lock hierarchy POV: the hotplug-cpu lock is an highlevel lock that is taken before lowlevel locks, and in this codepath we are called with the policy lock taken. Dieter did not have lockdep enabled so we dont have a nice stack trace proof for this, but using work_on_cpu() in such a lowlevel place certainly looks wrong, so we revert the patch. work_on_cpu() needs to be reworked to be more generally usable. Reported-by: Dieter Ries Tested-by: Dieter Ries Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 06fcd8f9323..6f11e029e8c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,9 +150,8 @@ struct drv_cmd { u32 val; }; -static long do_drv_read(void *_cmd) +static void do_drv_read(struct drv_cmd *cmd) { - struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -167,12 +166,10 @@ static long do_drv_read(void *_cmd) default: break; } - return 0; } -static long do_drv_write(void *_cmd) +static void do_drv_write(struct drv_cmd *cmd) { - struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -189,23 +186,30 @@ static long do_drv_write(void *_cmd) default: break; } - return 0; } static void drv_read(struct drv_cmd *cmd) { + cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); + set_cpus_allowed_ptr(current, cmd->mask); + do_drv_read(cmd); + set_cpus_allowed_ptr(current, &saved_mask); } static void drv_write(struct drv_cmd *cmd) { + cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - work_on_cpu(i, do_drv_write, cmd); + set_cpus_allowed_ptr(current, cpumask_of(i)); + do_drv_write(cmd); } + + set_cpus_allowed_ptr(current, &saved_mask); + return; } static u32 get_cur_val(const struct cpumask *mask) @@ -231,15 +235,10 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return 0; - cpumask_copy(cmd.mask, mask); drv_read(&cmd); - free_cpumask_var(cmd.mask); - dprintk("get_cur_val = %u\n", cmd.val); return cmd.val; -- cgit v1.2.3 From e8cea892dff8e3ebed42954c46730309b617196f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 19:36:59 +0100 Subject: Revert "i386: add TRACE_IRQS_OFF for the nmi" This reverts commit e0c7317557c8fc8eacf611e30c2a80f4e24e47a3. This patch was wrong, as lockdep (and thus the irq state tracer) aren't nmi safe. People are already seeing lockdep warnings due to this. Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index d6f0490a739..46469029e9d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1203,7 +1203,6 @@ nmi_stack_correct: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - TRACE_IRQS_OFF xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi @@ -1244,7 +1243,6 @@ nmi_espfix_stack: pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - TRACE_IRQS_OFF FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi -- cgit v1.2.3 From afc7d20c8429f32f19d47367fdc36eeed2334ec3 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 9 Jan 2009 16:13:10 -0800 Subject: x86 PAT: consolidate old memtype new memtype check into a function Impact: cleanup Move the new memtype old memtype allowed check to header so that is can be shared by other users. Subsequent patch uses this in pat.c in remap_pfn_range() code path. No functionality change in this patch. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 19 +++++++++++++++++++ arch/x86/pci/i386.c | 12 +----------- 2 files changed, 20 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 83e69f4a37f..06bbcbd66e9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -341,6 +341,25 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +static inline int is_new_memtype_allowed(unsigned long flags, + unsigned long new_flags) +{ + /* + * Certain new memtypes are not allowed with certain + * requested memtype: + * - request is uncached, return cannot be write-back + * - request is write-combine, return cannot be write-back + */ + if ((flags == _PAGE_CACHE_UC_MINUS && + new_flags == _PAGE_CACHE_WB) || + (flags == _PAGE_CACHE_WC && + new_flags == _PAGE_CACHE_WB)) { + return 0; + } + + return 1; +} + #ifndef __ASSEMBLY__ /* Indicate that x86 has its own track and untrack pfn vma functions */ #define __HAVE_PFNMAP_TRACKING diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index f884740da31..5ead808dd70 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -314,17 +314,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return retval; if (flags != new_flags) { - /* - * Do not fallback to certain memory types with certain - * requested type: - * - request is uncached, return cannot be write-back - * - request is uncached, return cannot be write-combine - * - request is write-combine, return cannot be write-back - */ - if ((flags == _PAGE_CACHE_UC_MINUS && - (new_flags == _PAGE_CACHE_WB)) || - (flags == _PAGE_CACHE_WC && - new_flags == _PAGE_CACHE_WB)) { + if (!is_new_memtype_allowed(flags, new_flags)) { free_memtype(addr, addr+len); return -EINVAL; } -- cgit v1.2.3 From e4b866ed197cef9989348e0479fed8d864ea465b Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 9 Jan 2009 16:13:11 -0800 Subject: x86 PAT: change track_pfn_vma_new to take pgprot_t pointer param Impact: cleanup Change the protection parameter for track_pfn_vma_new() into a pgprot_t pointer. Subsequent patch changes the x86 PAT handling to return a compatible memtype in pgprot_t, if what was requested cannot be allowed due to conflicts. No fuctionality change in this patch. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 85cbd3cd372..f88ac80530c 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -741,7 +741,7 @@ cleanup_ret: * Note that this function can be called with caller trying to map only a * subrange/page inside the vma. */ -int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, +int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long size) { int retval = 0; @@ -758,14 +758,14 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, prot); + return reserve_pfn_range(paddr, vma_size, *prot); } /* reserve page by page using pfn and size */ base_paddr = (resource_size_t)pfn << PAGE_SHIFT; for (i = 0; i < size; i += PAGE_SIZE) { paddr = base_paddr + i; - retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, *prot); if (retval) goto cleanup_ret; } -- cgit v1.2.3 From cdecff6864a1cd352a41d44a65e7451b8ef5cee2 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 9 Jan 2009 16:13:12 -0800 Subject: x86 PAT: return compatible mapping to remap_pfn_range callers Impact: avoid warning message, potentially solve 3D performance regression Change x86 PAT code to return compatible memtype if the exact memtype that was requested in remap_pfn_rage and friends is not available due to some conflict. This is done by returning the compatible type in pgprot parameter of track_pfn_vma_new(), and the caller uses that memtype for page table. Note that track_pfn_vma_copy() which is basically called during fork gets the prot from existing page table and should not have any conflict. Hence we use strict memtype check there and do not allow compatible memtypes. This patch fixes the bug reported here: http://marc.info/?l=linux-kernel&m=123108883716357&w=2 Specifically the error message: X:5010 map pfn expected mapping type write-back for d0000000-d0101000, got write-combining Should go away. Reported-and-bisected-by: Kevin Winchester Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f88ac80530c..8b08fb95527 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -601,12 +601,13 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) * Reserved non RAM regions only and after successful reserve_memtype, * this func also keeps identity mapping (if any) in sync with this new prot. */ -static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) +static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, + int strict_prot) { int is_ram = 0; int id_sz, ret; unsigned long flags; - unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); + unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); is_ram = pagerange_is_ram(paddr, paddr + size); @@ -625,15 +626,24 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) return ret; if (flags != want_flags) { - free_memtype(paddr, paddr + size); - printk(KERN_ERR - "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n", - current->comm, current->pid, - cattr_name(want_flags), - (unsigned long long)paddr, - (unsigned long long)(paddr + size), - cattr_name(flags)); - return -EINVAL; + if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) { + free_memtype(paddr, paddr + size); + printk(KERN_ERR "%s:%d map pfn expected mapping type %s" + " for %Lx-%Lx, got %s\n", + current->comm, current->pid, + cattr_name(want_flags), + (unsigned long long)paddr, + (unsigned long long)(paddr + size), + cattr_name(flags)); + return -EINVAL; + } + /* + * We allow returning different type than the one requested in + * non strict case. + */ + *vma_prot = __pgprot((pgprot_val(*vma_prot) & + (~_PAGE_CACHE_MASK)) | + flags); } /* Need to keep identity mapping in sync */ @@ -689,6 +699,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) unsigned long vma_start = vma->vm_start; unsigned long vma_end = vma->vm_end; unsigned long vma_size = vma_end - vma_start; + pgprot_t pgprot; if (!pat_enabled) return 0; @@ -702,7 +713,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) WARN_ON_ONCE(1); return -EINVAL; } - return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); + pgprot = __pgprot(prot); + return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } /* reserve entire vma page by page, using pfn and prot from pte */ @@ -710,7 +722,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) continue; - retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); + pgprot = __pgprot(prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1); if (retval) goto cleanup_ret; } @@ -758,14 +771,14 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, *prot); + return reserve_pfn_range(paddr, vma_size, prot, 0); } /* reserve page by page using pfn and size */ base_paddr = (resource_size_t)pfn << PAGE_SHIFT; for (i = 0; i < size; i += PAGE_SIZE) { paddr = base_paddr + i; - retval = reserve_pfn_range(paddr, PAGE_SIZE, *prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0); if (retval) goto cleanup_ret; } -- cgit v1.2.3 From 58dab916dfb57328d50deb0aa9b3fc92efa248ff Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 9 Jan 2009 16:13:14 -0800 Subject: x86 PAT: remove CPA WARN_ON for zero pte Impact: reduce scope of debug check - avoid warnings The logic to find whether identity map exists or not using high_memory or max_low_pfn_mapped/max_pfn_mapped are not complete as the memory withing the range may not be mapped if there is a unusable hole in e820. Specifically, on my test system I started seeing these warnings with tools like hwinfo, acpidump trying to map ACPI region. [ 27.400018] ------------[ cut here ]------------ [ 27.400344] WARNING: at /home/venkip/src/linus/linux-2.6/arch/x86/mm/pageattr.c:560 __change_page_attr_set_clr+0xf3/0x8b8() [ 27.400821] Hardware name: X7DB8 [ 27.401070] CPA: called for zero pte. vaddr = ffff8800cff6a000 cpa->vaddr = ffff8800cff6a000 [ 27.401569] Modules linked in: [ 27.401882] Pid: 4913, comm: dmidecode Not tainted 2.6.28-05716-gfe0bdec #586 [ 27.402141] Call Trace: [ 27.402488] [] warn_slowpath+0xd3/0x10f [ 27.402749] [] ? find_get_page+0xb3/0xc9 [ 27.403028] [] ? find_get_page+0x0/0xc9 [ 27.403333] [] __change_page_attr_set_clr+0xf3/0x8b8 [ 27.403628] [] ? __purge_vmap_area_lazy+0x192/0x1a1 [ 27.403883] [] ? __purge_vmap_area_lazy+0x4b/0x1a1 [ 27.404172] [] ? vm_unmap_aliases+0x1ab/0x1bb [ 27.404512] [] ? vm_unmap_aliases+0x48/0x1bb [ 27.404766] [] change_page_attr_set_clr+0x13e/0x2e6 [ 27.405026] [] ? _spin_unlock+0x26/0x2a [ 27.405292] [] ? reserve_memtype+0x19b/0x4e3 [ 27.405590] [] _set_memory_wb+0x22/0x24 [ 27.405844] [] ioremap_change_attr+0x26/0x28 [ 27.406097] [] reserve_pfn_range+0x1a3/0x235 [ 27.406427] [] track_pfn_vma_new+0x49/0xb3 [ 27.406686] [] remap_pfn_range+0x94/0x32c [ 27.406940] [] ? phys_mem_access_prot_allowed+0xb5/0x1a8 [ 27.407209] [] mmap_mem+0x75/0x9d [ 27.407523] [] mmap_region+0x2cf/0x53e [ 27.407776] [] do_mmap_pgoff+0x2a9/0x30d [ 27.408034] [] sys_mmap+0x92/0xce [ 27.408339] [] system_call_fastpath+0x16/0x1b [ 27.408614] ---[ end trace 4b16ad70c09a602d ]--- [ 27.408871] dmidecode:4913 reserve_pfn_range ioremap_change_attr failed write-back for cff6a000-cff6b000 This is wih track_pfn_vma_new trying to keep identity map in sync. The address cff6a000 is the ACPI region according to e820. [ 0.000000] BIOS-provided physical RAM map: [ 0.000000] BIOS-e820: 0000000000000000 - 000000000009c000 (usable) [ 0.000000] BIOS-e820: 000000000009c000 - 00000000000a0000 (reserved) [ 0.000000] BIOS-e820: 00000000000cc000 - 00000000000d0000 (reserved) [ 0.000000] BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved) [ 0.000000] BIOS-e820: 0000000000100000 - 00000000cff60000 (usable) [ 0.000000] BIOS-e820: 00000000cff60000 - 00000000cff69000 (ACPI data) [ 0.000000] BIOS-e820: 00000000cff69000 - 00000000cff80000 (ACPI NVS) [ 0.000000] BIOS-e820: 00000000cff80000 - 00000000d0000000 (reserved) [ 0.000000] BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved) [ 0.000000] BIOS-e820: 00000000fec00000 - 00000000fec10000 (reserved) [ 0.000000] BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) [ 0.000000] BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved) [ 0.000000] BIOS-e820: 0000000100000000 - 0000000230000000 (usable) And is not mapped as per init_memory_mapping. [ 0.000000] init_memory_mapping: 0000000000000000-00000000cff60000 [ 0.000000] init_memory_mapping: 0000000100000000-0000000230000000 We can add logic to check for this. But, there can also be other holes in identity map when we have 1GB of aligned reserved space in e820. This patch handles it by removing the WARN_ON and returning a specific error value (EFAULT) to indicate that the address does not have any identity mapping. The code that tries to keep identity map in sync can ignore this error, with other callers of cpa still getting error here. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 ++++++---- arch/x86/mm/pat.c | 45 ++++++++++++++++++++++++++++++++------------- 2 files changed, 38 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e89d24815f2..4cf30dee816 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -555,10 +555,12 @@ repeat: if (!pte_val(old_pte)) { if (!primary) return 0; - WARN(1, KERN_WARNING "CPA: called for zero pte. " - "vaddr = %lx cpa->vaddr = %lx\n", address, - *cpa->vaddr); - return -EINVAL; + + /* + * Special error value returned, indicating that the mapping + * did not exist at this address. + */ + return -EFAULT; } if (level == PG_LEVEL_4K) { diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 8b08fb95527..160c42d3eb8 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -505,6 +505,35 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } #endif /* CONFIG_STRICT_DEVMEM */ +/* + * Change the memory type for the physial address range in kernel identity + * mapping space if that range is a part of identity map. + */ +static int kernel_map_sync_memtype(u64 base, unsigned long size, + unsigned long flags) +{ + unsigned long id_sz; + int ret; + + if (!pat_enabled || base >= __pa(high_memory)) + return 0; + + id_sz = (__pa(high_memory) < base + size) ? + __pa(high_memory) - base : + size; + + ret = ioremap_change_attr((unsigned long)__va(base), id_sz, flags); + /* + * -EFAULT return means that the addr was not valid and did not have + * any identity mapping. That case is a success for + * kernel_map_sync_memtype. + */ + if (ret == -EFAULT) + ret = 0; + + return ret; +} + int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { @@ -555,9 +584,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (retval < 0) return 0; - if (((pfn < max_low_pfn_mapped) || - (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) && - ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { + if (kernel_map_sync_memtype(offset, size, flags)) { free_memtype(offset, offset + size); printk(KERN_INFO "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", @@ -605,7 +632,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, int strict_prot) { int is_ram = 0; - int id_sz, ret; + int ret; unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); @@ -646,15 +673,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, flags); } - /* Need to keep identity mapping in sync */ - if (paddr >= __pa(high_memory)) - return 0; - - id_sz = (__pa(high_memory) < paddr + size) ? - __pa(high_memory) - paddr : - size; - - if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { + if (kernel_map_sync_memtype(paddr, size, flags)) { free_memtype(paddr, paddr + size); printk(KERN_ERR "%s:%d reserve_pfn_range ioremap_change_attr failed %s " -- cgit v1.2.3 From e55380edf68796d75bf41391a781c68ee678587d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:55 +0100 Subject: [CVE-2009-0029] Rename old_readdir to sys_old_readdir This way it matches the generic system call name convention. Signed-off-by: Heiko Carstens --- arch/x86/kernel/syscall_table_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d44395ff34c..e2e86a08f31 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -88,7 +88,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate -- cgit v1.2.3 From 74d96f018673759d04d032c137d132f6447bfb1e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 13 Jan 2009 19:27:09 -0800 Subject: byteorder: make swab.h include asm/swab.h like a regular header Add swab.h to kbuild.asm and remove the individual entries from each arch, mark as unifdef as some arches have some kernel-only bits inside. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- arch/x86/include/asm/Kbuild | 1 - arch/x86/include/asm/byteorder.h | 1 - 2 files changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a9f8a814a1f..4a8e80cdcfa 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -22,4 +22,3 @@ unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += vm86.h unifdef-y += vsyscall.h -unifdef-y += swab.h diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index 7c49917e3d9..b13a7a88f3e 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_BYTEORDER_H #define _ASM_X86_BYTEORDER_H -#include #include #endif /* _ASM_X86_BYTEORDER_H */ -- cgit v1.2.3 From b5db0e38653bfada34a92f360b4111566ede3842 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Jan 2009 15:32:12 -0800 Subject: Revert "x86 PAT: remove CPA WARN_ON for zero pte" This reverts commit 58dab916dfb57328d50deb0aa9b3fc92efa248ff, which makes my Nehalem come to a nasty crawling almost-halt. It looks like it turns off caching of regular kernel RAM, with the understandable slowdown of a few orders of magnitude as a result. Acked-by: Ingo Molnar Cc: Yinghai Lu Cc: Peter Anvin Cc: Venkatesh Pallipadi Cc: Suresh Siddha Signed-off-by: Linus Torvalds --- arch/x86/mm/pageattr.c | 10 ++++------ arch/x86/mm/pat.c | 45 +++++++++++++-------------------------------- 2 files changed, 17 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4cf30dee816..e89d24815f2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -555,12 +555,10 @@ repeat: if (!pte_val(old_pte)) { if (!primary) return 0; - - /* - * Special error value returned, indicating that the mapping - * did not exist at this address. - */ - return -EFAULT; + WARN(1, KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", address, + *cpa->vaddr); + return -EINVAL; } if (level == PG_LEVEL_4K) { diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 160c42d3eb8..8b08fb95527 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -505,35 +505,6 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } #endif /* CONFIG_STRICT_DEVMEM */ -/* - * Change the memory type for the physial address range in kernel identity - * mapping space if that range is a part of identity map. - */ -static int kernel_map_sync_memtype(u64 base, unsigned long size, - unsigned long flags) -{ - unsigned long id_sz; - int ret; - - if (!pat_enabled || base >= __pa(high_memory)) - return 0; - - id_sz = (__pa(high_memory) < base + size) ? - __pa(high_memory) - base : - size; - - ret = ioremap_change_attr((unsigned long)__va(base), id_sz, flags); - /* - * -EFAULT return means that the addr was not valid and did not have - * any identity mapping. That case is a success for - * kernel_map_sync_memtype. - */ - if (ret == -EFAULT) - ret = 0; - - return ret; -} - int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { @@ -584,7 +555,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (retval < 0) return 0; - if (kernel_map_sync_memtype(offset, size, flags)) { + if (((pfn < max_low_pfn_mapped) || + (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) && + ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { free_memtype(offset, offset + size); printk(KERN_INFO "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", @@ -632,7 +605,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, int strict_prot) { int is_ram = 0; - int ret; + int id_sz, ret; unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); @@ -673,7 +646,15 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, flags); } - if (kernel_map_sync_memtype(paddr, size, flags)) { + /* Need to keep identity mapping in sync */ + if (paddr >= __pa(high_memory)) + return 0; + + id_sz = (__pa(high_memory) < paddr + size) ? + __pa(high_memory) - paddr : + size; + + if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { free_memtype(paddr, paddr + size); printk(KERN_ERR "%s:%d reserve_pfn_range ioremap_change_attr failed %s " -- cgit v1.2.3 From 5a4ccaf37ffece09ef33f1cfec67efa8ee56f967 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 21:15:32 +0100 Subject: kprobes: check CONFIG_FREEZER instead of CONFIG_PM Check CONFIG_FREEZER instead of CONFIG_PM because kprobe booster depends on freeze_processes() and thaw_processes() when CONFIG_PREEMPT=y. This fixes a linkage error which occurs when CONFIG_PREEMPT=y, CONFIG_PM=y and CONFIG_FREEZER=n. Reported-by: Cheng Renquan Signed-off-by: Masami Hiramatsu Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Signed-off-by: Len Brown --- arch/x86/kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 884d985b8b8..e948b28a5a9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -446,7 +446,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) if (p->ainsn.boostable == 1 && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); -- cgit v1.2.3