From 70ef56414ec7e01d787c8e959bb259845df4ee4f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Wed, 23 Jul 2008 17:36:37 +0530 Subject: x86: mm/fault.c declare do_page_fault before they get used declared do_page_fault() in asm-x86/trap.h for both X86_32 and X86_64 removed do_invalid_op declaration from mm/fault.c as it is already declared in asm-x86/trap.h Signed-off-by: Jaswinder Singh --- arch/x86/mm/fault.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 455f3fe67b4..8f92cac4e6d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -35,6 +35,7 @@ #include #include #include +#include /* * Page fault error code bits @@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address) return 0; } -void do_invalid_op(struct pt_regs *, unsigned long); - static int is_f00f_bug(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_F00F_BUG -- cgit v1.2.3 From cc643d4687533345fd8ebcba836f9ee25df7c458 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 29 Aug 2008 12:53:45 +0100 Subject: x86: adjust vmalloc_sync_all() for Xen (2nd try) Since the fourth PDPT entry cannot be shared under Xen, vmalloc_sync_all() must iterate over pmd-s rather than pgd-s here. Luckily, the code isn't used for native PAE (SHARED_KERNEL_PMD is 1) and the change is benign to non-PAE. Also do a little more cleanup in that function. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Cc: Jeremy Fitzhardinge --- arch/x86/mm/fault.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 455f3fe67b4..356ed2dec3a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -915,15 +915,15 @@ LIST_HEAD(pgd_list); void vmalloc_sync_all(void) { -#ifdef CONFIG_X86_32 - unsigned long start = VMALLOC_START & PGDIR_MASK; unsigned long address; +#ifdef CONFIG_X86_32 if (SHARED_KERNEL_PMD) return; - BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); - for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { + for (address = VMALLOC_START & PMD_MASK; + address >= TASK_SIZE && address < FIXADDR_TOP; + address += PMD_SIZE) { unsigned long flags; struct page *page; @@ -936,10 +936,8 @@ void vmalloc_sync_all(void) spin_unlock_irqrestore(&pgd_lock, flags); } #else /* CONFIG_X86_64 */ - unsigned long start = VMALLOC_START & PGDIR_MASK; - unsigned long address; - - for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { + for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; + address += PGDIR_SIZE) { const pgd_t *pgd_ref = pgd_offset_k(address); unsigned long flags; struct page *page; -- cgit v1.2.3 From 69c89b5bf7f253756f3056e84b8603abe1c50f5b Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 26 Sep 2008 14:03:07 +0200 Subject: traps: x86: remove trace_hardirqs_fixup from pagefault handler The last use of trace_hardirqs_fixup is unnecessary, because the trap is taken with interrupt off on i386 as well as x86_64, and the irq-tracer is notified of this from the assembly code. trace_hardirqs_fixup and trace_hardirqs_fixup_flags are removed from include/asm-x86/irqflags.h as they are no longer used. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a742d753d5b..3f2b8962cbd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -592,11 +592,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long flags; #endif - /* - * We can fault from pretty much anywhere, with unknown IRQ state. - */ - trace_hardirqs_fixup(); - tsk = current; mm = tsk->mm; prefetchw(&mm->mmap_sem); -- cgit v1.2.3 From 891cffbd6bcba26409869c19c07ecd4bfc0c2460 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Oct 2008 13:16:12 -0700 Subject: x86/mm: do not trigger a kernel warning if user-space disables interrupts and generates a page fault Arjan reported a spike in the following bug pattern in v2.6.27: http://www.kerneloops.org/searchweek.php?search=lock_page which happens because hwclock started triggering warnings due to a (correct) might_sleep() check in the MM code. The warning occurs because hwclock uses this dubious sequence of code to run "atomic" code: static unsigned long atomic(const char *name, unsigned long (*op)(unsigned long), unsigned long arg) { unsigned long v; __asm__ volatile ("cli"); v = (*op)(arg); __asm__ volatile ("sti"); return v; } Then it pagefaults in that "atomic" section, triggering the warning. There is no way the kernel could provide "atomicity" in this path, a page fault is a cannot-continue machine event so the kernel has to wait for the page to be filled in. Even if it was just a minor fault we'd have to take locks and might have to spend quite a bit of time with interrupts disabled - not nice to irq latencies in general. So instead just enable interrupts in the pagefault path unconditionally if we come from user-space, and handle the fault. Also, while touching this code, unify some trivial parts of the x86 VM paths at the same time. Signed-off-by: Linus Torvalds Reported-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a742d753d5b..ac2ad781da0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -645,24 +645,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) } -#ifdef CONFIG_X86_32 - /* It's safe to allow irq's after cr2 has been saved and the vmalloc - fault has been handled. */ - if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK)) - local_irq_enable(); - /* - * If we're in an interrupt, have no user context or are running in an - * atomic region then we must not take the fault. + * It's safe to allow irq's after cr2 has been saved and the + * vmalloc fault has been handled. + * + * User-mode registers count as a user access even for any + * potential system fault or CPU buglet. */ - if (in_atomic() || !mm) - goto bad_area_nosemaphore; -#else /* CONFIG_X86_64 */ - if (likely(regs->flags & X86_EFLAGS_IF)) + if (user_mode_vm(regs)) { + local_irq_enable(); + error_code |= PF_USER; + } else if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); +#ifdef CONFIG_X86_64 if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); +#endif /* * If we're in an interrupt, have no user context or are running in an @@ -671,14 +670,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(in_atomic() || !mm)) goto bad_area_nosemaphore; - /* - * User-mode registers count as a user access even for any - * potential system fault or CPU buglet. - */ - if (user_mode_vm(regs)) - error_code |= PF_USER; again: -#endif /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an -- cgit v1.2.3 From 3a1dfe6eefe483589c99c909202ffe1a20d589b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 13 Oct 2008 17:49:02 +0200 Subject: x86/mm: unify init task OOM handling Linus noticed that the "again:" versus "survive:" OOM logic for the init task was arbitrarily different. The 64-bit codepath is the better one, because it correctly re-lookups the vma after having dropped the ->mmap_sem. Signed-off-by: Ingo Molnar Acked-by: Linus Torvalds --- arch/x86/mm/fault.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86/mm/fault.c') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ac2ad781da0..8bc5956e1af 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -671,7 +671,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) goto bad_area_nosemaphore; again: - /* When running in the kernel we expect faults to occur only to + /* + * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem @@ -734,9 +735,6 @@ good_area: goto bad_area; } -#ifdef CONFIG_X86_32 -survive: -#endif /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -871,12 +869,11 @@ out_of_memory: up_read(&mm->mmap_sem); if (is_global_init(tsk)) { yield(); -#ifdef CONFIG_X86_32 - down_read(&mm->mmap_sem); - goto survive; -#else + /* + * Re-lookup the vma - in theory the vma tree might + * have changed: + */ goto again; -#endif } printk("VM: killing process %s\n", tsk->comm); -- cgit v1.2.3