From 8f17fc20bfb75bcec4cfeda789738979c8338fdc Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 15 Jun 2006 20:11:15 +0400
Subject: [PATCH] check_process_timers: fix possible lockup

If the local timer interrupt happens just after do_exit() sets PF_EXITING
(and before it clears ->it_xxx_expires) run_posix_cpu_timers() will call
check_process_timers() with tasklist_lock + ->siglock held and

	check_process_timers:

		t = tsk;
		do {
			....

			do {
				t = next_thread(t);
			} while (unlikely(t->flags & PF_EXITING));
		} while (t != tsk);

the outer loop will never stop.

Actually, the window is bigger.  Another process can attach the timer
after ->it_xxx_expires was cleared (see the next commit) and the 'if
(PF_EXITING)' check in arm_timer() is racy (see the one after that).

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/posix-cpu-timers.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 520f6c59948..9d9169aa2e2 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1173,6 +1173,9 @@ static void check_process_timers(struct task_struct *tsk,
 		}
 		t = tsk;
 		do {
+			if (unlikely(t->flags & PF_EXITING))
+				continue;
+
 			ticks = cputime_add(cputime_add(t->utime, t->stime),
 					    prof_left);
 			if (!cputime_eq(prof_expires, cputime_zero) &&
@@ -1193,11 +1196,7 @@ static void check_process_timers(struct task_struct *tsk,
 					      t->it_sched_expires > sched)) {
 				t->it_sched_expires = sched;
 			}
-
-			do {
-				t = next_thread(t);
-			} while (unlikely(t->flags & PF_EXITING));
-		} while (t != tsk);
+		} while ((t = next_thread(t)) != tsk);
 	}
 }
 
-- 
cgit v1.2.3


From 30f1e3dd8c72abda343bcf415f7d8894a02b4290 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 15 Jun 2006 20:11:43 +0400
Subject: [PATCH] run_posix_cpu_timers: remove a bogus BUG_ON()

do_exit() clears ->it_##clock##_expires, but nothing prevents
another cpu to attach the timer to exiting process after that.
arm_timer() tries to protect against this race, but the check
is racy.

After exit_notify() does 'write_unlock_irq(&tasklist_lock)' and
before do_exit() calls 'schedule() local timer interrupt can find
tsk->exit_state != 0. If that state was EXIT_DEAD (or another cpu
does sys_wait4) interrupted task has ->signal == NULL.

At this moment exiting task has no pending cpu timers, they were
cleanuped in __exit_signal()->posix_cpu_timers_exit{,_group}(),
so we can just return from irq.

John Stultz recently confirmed this bug, see

	http://marc.theaimsgroup.com/?l=linux-kernel&m=115015841413687

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c             |  8 --------
 kernel/posix-cpu-timers.c | 36 ++++++++++++++++++------------------
 2 files changed, 18 insertions(+), 26 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index e95b9328221..e06d0c10a24 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -881,14 +881,6 @@ fastcall NORET_TYPE void do_exit(long code)
 
 	tsk->flags |= PF_EXITING;
 
-	/*
-	 * Make sure we don't try to process any timer firings
-	 * while we are already exiting.
-	 */
- 	tsk->it_virt_expires = cputime_zero;
- 	tsk->it_prof_expires = cputime_zero;
-	tsk->it_sched_expires = 0;
-
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
 				current->comm, current->pid,
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9d9169aa2e2..4882bf1e094 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1288,30 +1288,30 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 #undef	UNEXPIRED
 
-	BUG_ON(tsk->exit_state);
-
 	/*
 	 * Double-check with locks held.
 	 */
 	read_lock(&tasklist_lock);
-	spin_lock(&tsk->sighand->siglock);
+	if (likely(tsk->signal != NULL)) {
+		spin_lock(&tsk->sighand->siglock);
 
-	/*
-	 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-	 * all the timers that are firing, and put them on the firing list.
-	 */
-	check_thread_timers(tsk, &firing);
-	check_process_timers(tsk, &firing);
+		/*
+		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
+		 * all the timers that are firing, and put them on the firing list.
+		 */
+		check_thread_timers(tsk, &firing);
+		check_process_timers(tsk, &firing);
 
-	/*
-	 * We must release these locks before taking any timer's lock.
-	 * There is a potential race with timer deletion here, as the
-	 * siglock now protects our private firing list.  We have set
-	 * the firing flag in each timer, so that a deletion attempt
-	 * that gets the timer lock before we do will give it up and
-	 * spin until we've taken care of that timer below.
-	 */
-	spin_unlock(&tsk->sighand->siglock);
+		/*
+		 * We must release these locks before taking any timer's lock.
+		 * There is a potential race with timer deletion here, as the
+		 * siglock now protects our private firing list.  We have set
+		 * the firing flag in each timer, so that a deletion attempt
+		 * that gets the timer lock before we do will give it up and
+		 * spin until we've taken care of that timer below.
+		 */
+		spin_unlock(&tsk->sighand->siglock);
+	}
 	read_unlock(&tasklist_lock);
 
 	/*
-- 
cgit v1.2.3


From f53ae1dc3429529a58aa538e0a860d713c7079c3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 15 Jun 2006 20:12:02 +0400
Subject: [PATCH] arm_timer: remove a racy and obsolete PF_EXITING check

arm_timer() checks PF_EXITING to prevent BUG_ON(->exit_state)
in run_posix_cpu_timers().

However, for some reason it does so only for CPUCLOCK_PERTHREAD
case (which is imho wrong).

Also, this check is not reliable, PF_EXITING could be set on
another cpu without any locks/barriers just after the check,
so it can't prevent from attaching the timer to the exiting
task.

The previous patch makes this check unneeded.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/posix-cpu-timers.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 4882bf1e094..d38d9ec3276 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -555,9 +555,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 	struct cpu_timer_list *next;
 	unsigned long i;
 
-	if (CPUCLOCK_PERTHREAD(timer->it_clock)	&& (p->flags & PF_EXITING))
-		return;
-
 	head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
 		p->cpu_timers : p->signal->cpu_timers);
 	head += CPUCLOCK_WHICH(timer->it_clock);
-- 
cgit v1.2.3


From 557240b48e2dc4f6fa878afc3fc767ad745ca7ed Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Mon, 19 Jun 2006 18:16:01 -0700
Subject: Add support for suspending and resuming the whole console subsystem

Trying to suspend/resume with console messages flying all around is
doomed to failure, when the devices that the messages are trying to
go to are being shut down.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/main.c |  2 ++
 kernel/printk.c     | 28 +++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/main.c b/kernel/power/main.c
index a6d9ef46009..0a907f0dc56 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -86,6 +86,7 @@ static int suspend_prepare(suspend_state_t state)
 			goto Thaw;
 	}
 
+	suspend_console();
 	if ((error = device_suspend(PMSG_SUSPEND))) {
 		printk(KERN_ERR "Some devices failed to suspend\n");
 		goto Finish;
@@ -133,6 +134,7 @@ int suspend_enter(suspend_state_t state)
 static void suspend_finish(suspend_state_t state)
 {
 	device_resume();
+	resume_console();
 	thaw_processes();
 	enable_nonboot_cpus();
 	if (pm_ops && pm_ops->finish)
diff --git a/kernel/printk.c b/kernel/printk.c
index c056f332443..19a95561929 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(oops_in_progress);
  * driver system.
  */
 static DECLARE_MUTEX(console_sem);
+static DECLARE_MUTEX(secondary_console_sem);
 struct console *console_drivers;
 /*
  * This is used for debugging the mess that is the VT code by
@@ -76,7 +77,7 @@ struct console *console_drivers;
  * path in the console code where we end up in places I want
  * locked without the console sempahore held
  */
-static int console_locked;
+static int console_locked, console_suspended;
 
 /*
  * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
@@ -697,6 +698,23 @@ int __init add_preferred_console(char *name, int idx, char *options)
 	return 0;
 }
 
+/**
+ * suspend_console - suspend the console subsystem
+ *
+ * This disables printk() while we go into suspend states
+ */
+void suspend_console(void)
+{
+	acquire_console_sem();
+	console_suspended = 1;
+}
+
+void resume_console(void)
+{
+	console_suspended = 0;
+	release_console_sem();
+}
+
 /**
  * acquire_console_sem - lock the console system for exclusive use.
  *
@@ -708,6 +726,10 @@ int __init add_preferred_console(char *name, int idx, char *options)
 void acquire_console_sem(void)
 {
 	BUG_ON(in_interrupt());
+	if (console_suspended) {
+		down(&secondary_console_sem);
+		return;
+	}
 	down(&console_sem);
 	console_locked = 1;
 	console_may_schedule = 1;
@@ -750,6 +772,10 @@ void release_console_sem(void)
 	unsigned long _con_start, _log_end;
 	unsigned long wake_klogd = 0;
 
+	if (console_suspended) {
+		up(&secondary_console_sem);
+		return;
+	}
 	for ( ; ; ) {
 		spin_lock_irqsave(&logbuf_lock, flags);
 		wake_klogd |= log_start - log_end;
-- 
cgit v1.2.3