From 70ef56414ec7e01d787c8e959bb259845df4ee4f Mon Sep 17 00:00:00 2001
From: Jaswinder Singh <jaswinder@infradead.org>
Date: Wed, 23 Jul 2008 17:36:37 +0530
Subject: x86: mm/fault.c declare do_page_fault before they get used

declared do_page_fault() in asm-x86/trap.h for both X86_32 and X86_64

removed do_invalid_op declaration from mm/fault.c as it is already declared in asm-x86/trap.h

Signed-off-by: Jaswinder Singh <jaswinder@infradead.org>
---
 arch/x86/mm/fault.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86/mm/fault.c')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe67b4..8f92cac4e6d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm-generic/sections.h>
+#include <asm/traps.h>
 
 /*
  * Page fault error code bits
@@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
-void do_invalid_op(struct pt_regs *, unsigned long);
-
 static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_F00F_BUG
-- 
cgit v1.2.3


From cc643d4687533345fd8ebcba836f9ee25df7c458 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 29 Aug 2008 12:53:45 +0100
Subject: x86: adjust vmalloc_sync_all() for Xen (2nd try)

Since the fourth PDPT entry cannot be shared under Xen,
vmalloc_sync_all() must iterate over pmd-s rather than pgd-s here.
Luckily, the code isn't used for native PAE (SHARED_KERNEL_PMD is 1)
and the change is benign to non-PAE.

Also do a little more cleanup in that function.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
---
 arch/x86/mm/fault.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch/x86/mm/fault.c')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 455f3fe67b4..356ed2dec3a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -915,15 +915,15 @@ LIST_HEAD(pgd_list);
 
 void vmalloc_sync_all(void)
 {
-#ifdef CONFIG_X86_32
-	unsigned long start = VMALLOC_START & PGDIR_MASK;
 	unsigned long address;
 
+#ifdef CONFIG_X86_32
 	if (SHARED_KERNEL_PMD)
 		return;
 
-	BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
-	for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+	for (address = VMALLOC_START & PMD_MASK;
+	     address >= TASK_SIZE && address < FIXADDR_TOP;
+	     address += PMD_SIZE) {
 		unsigned long flags;
 		struct page *page;
 
@@ -936,10 +936,8 @@ void vmalloc_sync_all(void)
 		spin_unlock_irqrestore(&pgd_lock, flags);
 	}
 #else /* CONFIG_X86_64 */
-	unsigned long start = VMALLOC_START & PGDIR_MASK;
-	unsigned long address;
-
-	for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
+	     address += PGDIR_SIZE) {
 		const pgd_t *pgd_ref = pgd_offset_k(address);
 		unsigned long flags;
 		struct page *page;
-- 
cgit v1.2.3


From 69c89b5bf7f253756f3056e84b8603abe1c50f5b Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@fastmail.fm>
Date: Fri, 26 Sep 2008 14:03:07 +0200
Subject: traps: x86: remove trace_hardirqs_fixup from pagefault handler

The last use of trace_hardirqs_fixup is unnecessary, because the
trap is taken with interrupt off on i386 as well as x86_64, and
the irq-tracer is notified of this from the assembly code.

trace_hardirqs_fixup and trace_hardirqs_fixup_flags are removed
from include/asm-x86/irqflags.h as they are no longer used.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/fault.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86/mm/fault.c')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a742d753d5b..3f2b8962cbd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -592,11 +592,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	unsigned long flags;
 #endif
 
-	/*
-	 * We can fault from pretty much anywhere, with unknown IRQ state.
-	 */
-	trace_hardirqs_fixup();
-
 	tsk = current;
 	mm = tsk->mm;
 	prefetchw(&mm->mmap_sem);
-- 
cgit v1.2.3


From 891cffbd6bcba26409869c19c07ecd4bfc0c2460 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 12 Oct 2008 13:16:12 -0700
Subject: x86/mm: do not trigger a kernel warning if user-space disables
 interrupts and generates a page fault

Arjan reported a spike in the following bug pattern in v2.6.27:

   http://www.kerneloops.org/searchweek.php?search=lock_page

which happens because hwclock started triggering warnings due to
a (correct) might_sleep() check in the MM code.

The warning occurs because hwclock uses this dubious sequence of
code to run "atomic" code:

  static unsigned long
  atomic(const char *name, unsigned long (*op)(unsigned long),
         unsigned long arg)
  {
    unsigned long v;
    __asm__ volatile ("cli");
    v = (*op)(arg);
    __asm__ volatile ("sti");
    return v;
  }

Then it pagefaults in that "atomic" section, triggering the warning.

There is no way the kernel could provide "atomicity" in this path,
a page fault is a cannot-continue machine event so the kernel has to
wait for the page to be filled in.

Even if it was just a minor fault we'd have to take locks and might have
to spend quite a bit of time with interrupts disabled - not nice to irq
latencies in general.

So instead just enable interrupts in the pagefault path unconditionally
if we come from user-space, and handle the fault.

Also, while touching this code, unify some trivial parts of the x86
VM paths at the same time.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/fault.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

(limited to 'arch/x86/mm/fault.c')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a742d753d5b..ac2ad781da0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -645,24 +645,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	}
 
 
-#ifdef CONFIG_X86_32
-	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
-	   fault has been handled. */
-	if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
-		local_irq_enable();
-
 	/*
-	 * If we're in an interrupt, have no user context or are running in an
-	 * atomic region then we must not take the fault.
+	 * It's safe to allow irq's after cr2 has been saved and the
+	 * vmalloc fault has been handled.
+	 *
+	 * User-mode registers count as a user access even for any
+	 * potential system fault or CPU buglet.
 	 */
-	if (in_atomic() || !mm)
-		goto bad_area_nosemaphore;
-#else /* CONFIG_X86_64 */
-	if (likely(regs->flags & X86_EFLAGS_IF))
+	if (user_mode_vm(regs)) {
+		local_irq_enable();
+		error_code |= PF_USER;
+	} else if (regs->flags & X86_EFLAGS_IF)
 		local_irq_enable();
 
+#ifdef CONFIG_X86_64
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(address, regs, error_code);
+#endif
 
 	/*
 	 * If we're in an interrupt, have no user context or are running in an
@@ -671,14 +670,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
 
-	/*
-	 * User-mode registers count as a user access even for any
-	 * potential system fault or CPU buglet.
-	 */
-	if (user_mode_vm(regs))
-		error_code |= PF_USER;
 again:
-#endif
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
-- 
cgit v1.2.3


From 3a1dfe6eefe483589c99c909202ffe1a20d589b5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 13 Oct 2008 17:49:02 +0200
Subject: x86/mm: unify init task OOM handling

Linus noticed that the "again:" versus "survive:" OOM logic for
the init task was arbitrarily different.

The 64-bit codepath is the better one, because it correctly re-lookups
the vma after having dropped the ->mmap_sem.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/fault.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'arch/x86/mm/fault.c')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ac2ad781da0..8bc5956e1af 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -671,7 +671,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		goto bad_area_nosemaphore;
 
 again:
-	/* When running in the kernel we expect faults to occur only to
+	/*
+	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
 	 * erroneous fault occurring in a code path which already holds mmap_sem
@@ -734,9 +735,6 @@ good_area:
 			goto bad_area;
 	}
 
-#ifdef CONFIG_X86_32
-survive:
-#endif
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
@@ -871,12 +869,11 @@ out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (is_global_init(tsk)) {
 		yield();
-#ifdef CONFIG_X86_32
-		down_read(&mm->mmap_sem);
-		goto survive;
-#else
+		/*
+		 * Re-lookup the vma - in theory the vma tree might
+		 * have changed:
+		 */
 		goto again;
-#endif
 	}
 
 	printk("VM: killing process %s\n", tsk->comm);
-- 
cgit v1.2.3