From ae74c3b69a08e1de20cb681ec959f3a48af0006a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Wed, 2 Aug 2006 20:17:49 -0700
Subject: Fix force_sig_info() semantics after cleanups

Suresh points out that commit b0423a0d9cc836b2c3d796623cd19236bfedfe63
broke the semantics of a synchronous signal like SIGSEGV occurring
recursively inside its own handler handler (or, indeed, any other
context when the signal was blocked).

That was unintentional, and this fixes things up by reinstating the old
semantics, but without reverting the cleanups.

Cc: Paul E. McKenney <paulmck@us.ibm.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/signal.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 7fe874d12fa..bfdb5686fa3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -791,22 +791,31 @@ out:
 /*
  * Force a signal that the process can't ignore: if necessary
  * we unblock the signal and change any SIG_IGN to SIG_DFL.
+ *
+ * Note: If we unblock the signal, we always reset it to SIG_DFL,
+ * since we do not want to have a signal handler that was blocked
+ * be invoked when user space had explicitly blocked it.
+ *
+ * We don't want to have recursive SIGSEGV's etc, for example.
  */
-
 int
 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 {
 	unsigned long int flags;
-	int ret;
+	int ret, blocked, ignored;
+	struct k_sigaction *action;
 
 	spin_lock_irqsave(&t->sighand->siglock, flags);
-	if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) {
-		t->sighand->action[sig-1].sa.sa_handler = SIG_DFL;
-	}
-	if (sigismember(&t->blocked, sig)) {
-		sigdelset(&t->blocked, sig);
+	action = &t->sighand->action[sig-1];
+	ignored = action->sa.sa_handler == SIG_IGN;
+	blocked = sigismember(&t->blocked, sig);
+	if (blocked || ignored) {
+		action->sa.sa_handler = SIG_DFL;
+		if (blocked) {
+			sigdelset(&t->blocked, sig);
+			recalc_sigpending_tsk(t);
+		}
 	}
-	recalc_sigpending_tsk(t);
 	ret = specific_send_sig_info(sig, info, t);
 	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 
-- 
cgit v1.2.3


From 3e2efce067cec0099f99ae59f28feda99b02b498 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 13 Jul 2006 13:16:02 -0400
Subject: [PATCH] fix faulty inode data collection for open() with O_CREAT

When the specified path is an existing file or when it is a symlink, audit
collects the wrong inode number, which causes it to miss the open() event.
Adding a second hook to the open() path fixes this.

Also add audit_copy_inode() to consolidate some code.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/auditsc.c | 63 ++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ae40ac8c39e..b939ed2da3e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1199,14 +1199,18 @@ void audit_putname(const char *name)
 #endif
 }
 
-static void audit_inode_context(int idx, const struct inode *inode)
+/* Copy inode data into an audit_names. */
+static void audit_copy_inode(struct audit_names *name, const struct inode *inode)
 {
-	struct audit_context *context = current->audit_context;
-
-	selinux_get_inode_sid(inode, &context->names[idx].osid);
+	name->ino   = inode->i_ino;
+	name->dev   = inode->i_sb->s_dev;
+	name->mode  = inode->i_mode;
+	name->uid   = inode->i_uid;
+	name->gid   = inode->i_gid;
+	name->rdev  = inode->i_rdev;
+	selinux_get_inode_sid(inode, &name->osid);
 }
 
-
 /**
  * audit_inode - store the inode and device from a lookup
  * @name: name being audited
@@ -1240,13 +1244,7 @@ void __audit_inode(const char *name, const struct inode *inode)
 		++context->ino_count;
 #endif
 	}
-	context->names[idx].ino   = inode->i_ino;
-	context->names[idx].dev	  = inode->i_sb->s_dev;
-	context->names[idx].mode  = inode->i_mode;
-	context->names[idx].uid   = inode->i_uid;
-	context->names[idx].gid   = inode->i_gid;
-	context->names[idx].rdev  = inode->i_rdev;
-	audit_inode_context(idx, inode);
+	audit_copy_inode(&context->names[idx], inode);
 }
 
 /**
@@ -1302,16 +1300,37 @@ update_context:
 	context->names[idx].name_len = AUDIT_NAME_FULL;
 	context->names[idx].name_put = 0;	/* don't call __putname() */
 
-	if (inode) {
-		context->names[idx].ino   = inode->i_ino;
-		context->names[idx].dev	  = inode->i_sb->s_dev;
-		context->names[idx].mode  = inode->i_mode;
-		context->names[idx].uid   = inode->i_uid;
-		context->names[idx].gid   = inode->i_gid;
-		context->names[idx].rdev  = inode->i_rdev;
-		audit_inode_context(idx, inode);
-	} else
-		context->names[idx].ino   = (unsigned long)-1;
+	if (!inode)
+		context->names[idx].ino = (unsigned long)-1;
+	else
+		audit_copy_inode(&context->names[idx], inode);
+}
+
+/**
+ * audit_inode_update - update inode info for last collected name
+ * @inode: inode being audited
+ *
+ * When open() is called on an existing object with the O_CREAT flag, the inode
+ * data audit initially collects is incorrect.  This additional hook ensures
+ * audit has the inode data for the actual object to be opened.
+ */
+void __audit_inode_update(const struct inode *inode)
+{
+	struct audit_context *context = current->audit_context;
+	int idx;
+
+	if (!context->in_syscall || !inode)
+		return;
+
+	if (context->name_count == 0) {
+		context->name_count++;
+#if AUDIT_DEBUG
+		context->ino_count++;
+#endif
+	}
+	idx = context->name_count - 1;
+
+	audit_copy_inode(&context->names[idx], inode);
 }
 
 /**
-- 
cgit v1.2.3


From 73d3ec5abad3f1730ac8530899d2c14d92f3ad63 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 13 Jul 2006 13:16:39 -0400
Subject: [PATCH] fix missed create event for directory audit

When an object is created via a symlink into an audited directory, audit misses
the event due to not having collected the inode data for the directory.  Modify
__audit_inode_child() to copy the parent inode data if a parent wasn't found in
audit_names[].

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/auditsc.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b939ed2da3e..b1356fc63b2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1251,7 +1251,7 @@ void __audit_inode(const char *name, const struct inode *inode)
  * audit_inode_child - collect inode info for created/removed objects
  * @dname: inode's dentry name
  * @inode: inode being audited
- * @pino: inode number of dentry parent
+ * @parent: inode of dentry parent
  *
  * For syscalls that create or remove filesystem objects, audit_inode
  * can only collect information for the filesystem object's parent.
@@ -1262,7 +1262,7 @@ void __audit_inode(const char *name, const struct inode *inode)
  * unsuccessful attempts.
  */
 void __audit_inode_child(const char *dname, const struct inode *inode,
-			 unsigned long pino)
+			 const struct inode *parent)
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
@@ -1276,7 +1276,7 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
 	if (!dname)
 		goto update_context;
 	for (idx = 0; idx < context->name_count; idx++)
-		if (context->names[idx].ino == pino) {
+		if (context->names[idx].ino == parent->i_ino) {
 			const char *name = context->names[idx].name;
 
 			if (!name)
@@ -1304,6 +1304,16 @@ update_context:
 		context->names[idx].ino = (unsigned long)-1;
 	else
 		audit_copy_inode(&context->names[idx], inode);
+
+	/* A parent was not found in audit_names, so copy the inode data for the
+	 * provided parent. */
+	if (!found_name) {
+		idx = context->name_count++;
+#if AUDIT_DEBUG
+		context->ino_count++;
+#endif
+		audit_copy_inode(&context->names[idx], parent);
+	}
 }
 
 /**
-- 
cgit v1.2.3


From 6988434ee5f532c71be3131fba23283f5cf43847 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 13 Jul 2006 13:17:12 -0400
Subject: [PATCH] fix oops with CONFIG_AUDIT and !CONFIG_AUDITSYSCALL

Always initialize the audit_inode_hash[] so we don't oops on list rules.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/audit.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index d417ca1db79..0a36091ed71 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -690,9 +690,7 @@ static const struct inotify_operations audit_inotify_ops = {
 /* Initialize audit support at boot time. */
 static int __init audit_init(void)
 {
-#ifdef CONFIG_AUDITSYSCALL
 	int i;
-#endif
 
 	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
 	       audit_default ? "enabled" : "disabled");
@@ -717,10 +715,10 @@ static int __init audit_init(void)
 	audit_ih = inotify_init(&audit_inotify_ops);
 	if (IS_ERR(audit_ih))
 		audit_panic("cannot initialize inotify handle");
+#endif
 
 	for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
 		INIT_LIST_HEAD(&audit_inode_hash[i]);
-#endif
 
 	return 0;
 }
-- 
cgit v1.2.3


From 5422e01ac16df7398b2bad1eccad0ae3be4dee32 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Tue, 1 Aug 2006 17:52:26 -0400
Subject: [PATCH] fix audit oops with invalid operator

Michael C Thompson wrote:  [Tue Aug 01 2006, 02:36:36PM EDT]
> The trigger for this oops is:
> # auditctl -a exit,always -S pread64 -F 'inode<1'

Setting the err value will fix it.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/auditfilter.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 5b4e16276ca..32420f91402 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -442,6 +442,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		case AUDIT_EQUAL:
 			break;
 		default:
+			err = -EINVAL;
 			goto exit_free;
 		}
 	}
@@ -579,6 +580,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_EQUAL:
 			break;
 		default:
+			err = -EINVAL;
 			goto exit_free;
 		}
 	}
-- 
cgit v1.2.3


From 471a5c7c839114cc8b55876203aeb2817c33e3c5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 10 Jul 2006 08:29:24 -0400
Subject: [PATCH] introduce audit rules counter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/auditfilter.c | 24 ++++++++++++++++++++++++
 kernel/auditsc.c     |  3 +++
 2 files changed, 27 insertions(+)

(limited to 'kernel')

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 32420f91402..6a9a5c5a4e7 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1136,6 +1136,14 @@ static inline int audit_add_rule(struct audit_entry *entry,
 	struct audit_watch *watch = entry->rule.watch;
 	struct nameidata *ndp, *ndw;
 	int h, err, putnd_needed = 0;
+#ifdef CONFIG_AUDITSYSCALL
+	int dont_count = 0;
+
+	/* If either of these, don't count towards total */
+	if (entry->rule.listnr == AUDIT_FILTER_USER ||
+		entry->rule.listnr == AUDIT_FILTER_TYPE)
+		dont_count = 1;
+#endif
 
 	if (inode_f) {
 		h = audit_hash_ino(inode_f->val);
@@ -1176,6 +1184,10 @@ static inline int audit_add_rule(struct audit_entry *entry,
 	} else {
 		list_add_tail_rcu(&entry->list, list);
 	}
+#ifdef CONFIG_AUDITSYSCALL
+	if (!dont_count)
+		audit_n_rules++;
+#endif
 	mutex_unlock(&audit_filter_mutex);
 
 	if (putnd_needed)
@@ -1200,6 +1212,14 @@ static inline int audit_del_rule(struct audit_entry *entry,
 	struct audit_watch *watch, *tmp_watch = entry->rule.watch;
 	LIST_HEAD(inotify_list);
 	int h, ret = 0;
+#ifdef CONFIG_AUDITSYSCALL
+	int dont_count = 0;
+
+	/* If either of these, don't count towards total */
+	if (entry->rule.listnr == AUDIT_FILTER_USER ||
+		entry->rule.listnr == AUDIT_FILTER_TYPE)
+		dont_count = 1;
+#endif
 
 	if (inode_f) {
 		h = audit_hash_ino(inode_f->val);
@@ -1237,6 +1257,10 @@ static inline int audit_del_rule(struct audit_entry *entry,
 	list_del_rcu(&e->list);
 	call_rcu(&e->rcu, audit_free_rule_rcu);
 
+#ifdef CONFIG_AUDITSYSCALL
+	if (!dont_count)
+		audit_n_rules--;
+#endif
 	mutex_unlock(&audit_filter_mutex);
 
 	if (!list_empty(&inotify_list))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b1356fc63b2..3ea836d3d94 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -85,6 +85,9 @@ extern int audit_enabled;
 /* Indicates that audit should log the full pathname. */
 #define AUDIT_NAME_FULL -1
 
+/* number of audit rules */
+int audit_n_rules;
+
 /* When fs/namei.c:getname() is called, we store the pointer in name and
  * we don't let putname() free it (instead we free all of the saved
  * pointers at syscall exit time).
-- 
cgit v1.2.3


From d51374adf5f2f88155a072d3d801104e3c0c3d7f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Aug 2006 10:59:26 -0400
Subject: [PATCH] mark context of syscall entered with no rules as dummy

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/auditsc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3ea836d3d94..9618d150725 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -177,6 +177,7 @@ struct audit_aux_data_path {
 
 /* The per-task audit context. */
 struct audit_context {
+	int		    dummy;	/* must be the first element */
 	int		    in_syscall;	/* 1 if task is in a syscall */
 	enum audit_state    state;
 	unsigned int	    serial;     /* serial number for record */
@@ -517,7 +518,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 	context->return_valid = return_valid;
 	context->return_code  = return_code;
 
-	if (context->in_syscall && !context->auditable) {
+	if (context->in_syscall && !context->dummy && !context->auditable) {
 		enum audit_state state;
 
 		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
@@ -1069,7 +1070,8 @@ void audit_syscall_entry(int arch, int major,
 	context->argv[3]    = a4;
 
 	state = context->state;
-	if (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT)
+	context->dummy = !audit_n_rules;
+	if (!context->dummy && (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT))
 		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]);
 	if (likely(state == AUDIT_DISABLED))
 		return;
-- 
cgit v1.2.3


From 5ac3a9c26c1cc4861d9cdd8b293fecbfcdc81afe Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 16 Jul 2006 06:38:45 -0400
Subject: [PATCH] don't bother with aux entires for dummy context

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/auditsc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9618d150725..f571c7e925e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1676,7 +1676,7 @@ int audit_bprm(struct linux_binprm *bprm)
 	unsigned long p, next;
 	void *to;
 
-	if (likely(!audit_enabled || !context))
+	if (likely(!audit_enabled || !context || context->dummy))
 		return 0;
 
 	ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
@@ -1714,7 +1714,7 @@ int audit_socketcall(int nargs, unsigned long *args)
 	struct audit_aux_data_socketcall *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context))
+	if (likely(!context || context->dummy))
 		return 0;
 
 	ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL);
@@ -1742,7 +1742,7 @@ int audit_sockaddr(int len, void *a)
 	struct audit_aux_data_sockaddr *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context))
+	if (likely(!context || context->dummy))
 		return 0;
 
 	ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL);
-- 
cgit v1.2.3


From 3f2792ffbd88dc1cd41d226674cc428914981e98 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 16 Jul 2006 06:43:48 -0400
Subject: [PATCH] take filling ->pid, etc. out of audit_get_context()

move that stuff downstream and into the only branch where it'll be
used.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/auditsc.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f571c7e925e..efc1b74bebf 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -534,17 +534,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 	}
 
 get_context:
-	context->pid = tsk->pid;
-	context->ppid = sys_getppid();	/* sic.  tsk == current in all cases */
-	context->uid = tsk->uid;
-	context->gid = tsk->gid;
-	context->euid = tsk->euid;
-	context->suid = tsk->suid;
-	context->fsuid = tsk->fsuid;
-	context->egid = tsk->egid;
-	context->sgid = tsk->sgid;
-	context->fsgid = tsk->fsgid;
-	context->personality = tsk->personality;
+
 	tsk->audit_context = NULL;
 	return context;
 }
@@ -753,6 +743,17 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	const char *tty;
 
 	/* tsk == current */
+	context->pid = tsk->pid;
+	context->ppid = sys_getppid();	/* sic.  tsk == current in all cases */
+	context->uid = tsk->uid;
+	context->gid = tsk->gid;
+	context->euid = tsk->euid;
+	context->suid = tsk->suid;
+	context->fsuid = tsk->fsuid;
+	context->egid = tsk->egid;
+	context->sgid = tsk->sgid;
+	context->fsgid = tsk->fsgid;
+	context->personality = tsk->personality;
 
 	ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
 	if (!ab)
-- 
cgit v1.2.3


From a7ef7878ea7c8bca9b624db3f61223cdadda2a0a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 5 Aug 2006 12:13:42 -0700
Subject: [PATCH] Make suspend possible with a traced process at a breakpoint

It should be possible to suspend, either to RAM or to disk, if there's a
traced process that has just reached a breakpoint.  However, this is a
special case, because its parent process might have been frozen already and
then we are unable to deliver the "freeze" signal to the traced process.
If this happens, it's better to cancel the freezing of the traced process.

Ref. http://bugzilla.kernel.org/show_bug.cgi?id=6787

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/process.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/process.c b/kernel/power/process.c
index b2a5f671d6c..72e72d2c61e 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -66,13 +66,25 @@ static inline void freeze_process(struct task_struct *p)
 	}
 }
 
+static void cancel_freezing(struct task_struct *p)
+{
+	unsigned long flags;
+
+	if (freezing(p)) {
+		pr_debug("  clean up: %s\n", p->comm);
+		do_not_freeze(p);
+		spin_lock_irqsave(&p->sighand->siglock, flags);
+		recalc_sigpending_tsk(p);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	}
+}
+
 /* 0 = success, else # of processes that we failed to stop */
 int freeze_processes(void)
 {
 	int todo, nr_user, user_frozen;
 	unsigned long start_time;
 	struct task_struct *g, *p;
-	unsigned long flags;
 
 	printk( "Stopping tasks: " );
 	start_time = jiffies;
@@ -85,6 +97,10 @@ int freeze_processes(void)
 				continue;
 			if (frozen(p))
 				continue;
+			if (p->state == TASK_TRACED && frozen(p->parent)) {
+				cancel_freezing(p);
+				continue;
+			}
 			if (p->mm && !(p->flags & PF_BORROWED_MM)) {
 				/* The task is a user-space one.
 				 * Freeze it unless there's a vfork completion
@@ -126,13 +142,7 @@ int freeze_processes(void)
 		do_each_thread(g, p) {
 			if (freezeable(p) && !frozen(p))
 				printk(KERN_ERR "  %s\n", p->comm);
-			if (freezing(p)) {
-				pr_debug("  clean up: %s\n", p->comm);
-				p->flags &= ~PF_FREEZE;
-				spin_lock_irqsave(&p->sighand->siglock, flags);
-				recalc_sigpending_tsk(p);
-				spin_unlock_irqrestore(&p->sighand->siglock, flags);
-			}
+			cancel_freezing(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 		return todo;
-- 
cgit v1.2.3


From e91467ecd1ef381377fd327c0ded922835ec52ab Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntrae@de.ibm.com>
Date: Sat, 5 Aug 2006 12:13:52 -0700
Subject: [PATCH] bug in futex unqueue_me

This patch adds a barrier() in futex unqueue_me to avoid aliasing of two
pointers.

On my s390x system I saw the following oops:

Unable to handle kernel pointer dereference at virtual kernel address
0000000000000000
Oops: 0004 [#1]
CPU:    0    Not tainted
Process mytool (pid: 13613, task: 000000003ecb6ac0, ksp: 00000000366bdbd8)
Krnl PSW : 0704d00180000000 00000000003c9ac2 (_spin_lock+0xe/0x30)
Krnl GPRS: 00000000ffffffff 000000003ecb6ac0 0000000000000000 0700000000000000
           0000000000000000 0000000000000000 000001fe00002028 00000000000c091f
           000001fe00002054 000001fe00002054 0000000000000000 00000000366bddc0
           00000000005ef8c0 00000000003d00e8 0000000000144f91 00000000366bdcb8
Krnl Code: ba 4e 20 00 12 44 b9 16 00 3e a7 84 00 08 e3 e0 f0 88 00 04
Call Trace:
([<0000000000144f90>] unqueue_me+0x40/0xe4)
 [<0000000000145a0c>] do_futex+0x33c/0xc40
 [<000000000014643e>] sys_futex+0x12e/0x144
 [<000000000010bb00>] sysc_noemu+0x10/0x16
 [<000002000003741c>] 0x2000003741c

The code in question is:

static int unqueue_me(struct futex_q *q)
{
        int ret = 0;
        spinlock_t *lock_ptr;

        /* In the common case we don't take the spinlock, which is nice. */
 retry:
        lock_ptr = q->lock_ptr;
        if (lock_ptr != 0) {
                spin_lock(lock_ptr);
		/*
                 * q->lock_ptr can change between reading it and
                 * spin_lock(), causing us to take the wrong lock.  This
                 * corrects the race condition.
[...]

and my compiler (gcc 4.1.0) makes the following out of it:

00000000000003c8 <unqueue_me>:
     3c8:       eb bf f0 70 00 24       stmg    %r11,%r15,112(%r15)
     3ce:       c0 d0 00 00 00 00       larl    %r13,3ce <unqueue_me+0x6>
                        3d0: R_390_PC32DBL      .rodata+0x2a
     3d4:       a7 f1 1e 00             tml     %r15,7680
     3d8:       a7 84 00 01             je      3da <unqueue_me+0x12>
     3dc:       b9 04 00 ef             lgr     %r14,%r15
     3e0:       a7 fb ff d0             aghi    %r15,-48
     3e4:       b9 04 00 b2             lgr     %r11,%r2
     3e8:       e3 e0 f0 98 00 24       stg     %r14,152(%r15)
     3ee:       e3 c0 b0 28 00 04       lg      %r12,40(%r11)
		/* write q->lock_ptr in r12 */
     3f4:       b9 02 00 cc             ltgr    %r12,%r12
     3f8:       a7 84 00 4b             je      48e <unqueue_me+0xc6>
		/* if r12 is zero then jump over the code.... */
     3fc:       e3 20 b0 28 00 04       lg      %r2,40(%r11)
		/* write q->lock_ptr in r2 */
     402:       c0 e5 00 00 00 00       brasl   %r14,402 <unqueue_me+0x3a>
                        404: R_390_PC32DBL      _spin_lock+0x2
		/* use r2 as parameter for spin_lock */

So the code becomes more or less:
if (q->lock_ptr != 0) spin_lock(q->lock_ptr)
instead of
if (lock_ptr != 0) spin_lock(lock_ptr)

Which caused the oops from above.
After adding a barrier gcc creates code without this problem:
[...] (the same)
     3ee:       e3 c0 b0 28 00 04       lg      %r12,40(%r11)
     3f4:       b9 02 00 cc             ltgr    %r12,%r12
     3f8:       b9 04 00 2c             lgr     %r2,%r12
     3fc:       a7 84 00 48             je      48c <unqueue_me+0xc4>
     400:       c0 e5 00 00 00 00       brasl   %r14,400 <unqueue_me+0x38>
                        402: R_390_PC32DBL      _spin_lock+0x2

As a general note, this code of unqueue_me seems a bit fishy. The retry logic
of unqueue_me only works if we can guarantee, that the original value of
q->lock_ptr is always a spinlock (Otherwise we overwrite kernel memory). We
know that q->lock_ptr can change. I dont know what happens with the original
spinlock, as I am not an expert with the futex code.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@timesys.com>
Signed-off-by: Christian Borntraeger <borntrae@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/futex.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index dda2049692a..c2b2e0b83ab 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -948,6 +948,7 @@ static int unqueue_me(struct futex_q *q)
 	/* In the common case we don't take the spinlock, which is nice. */
  retry:
 	lock_ptr = q->lock_ptr;
+	barrier();
 	if (lock_ptr != 0) {
 		spin_lock(lock_ptr);
 		/*
-- 
cgit v1.2.3


From 9f59ce5d0e0dd837853385927b150f5cef3a7f52 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <76306.1226@compuserve.com>
Date: Sat, 5 Aug 2006 12:14:11 -0700
Subject: [PATCH] ptrace: make pid of child process available for
 PTRACE_EVENT_VFORK_DONE

When delivering PTRACE_EVENT_VFORK_DONE, provide pid of the child process
when tracer calls ptrace(PTRACE_GETEVENTMSG).  This is already
(accidentally) available when the tracer is tracing VFORK in addition to
VFORK_DONE.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Daniel Jacobowitz <dan@debian.org>
Cc: Albert Cahalan <acahalan@gmail.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 1b0f7b1e088..aa36c43783c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1387,8 +1387,10 @@ long do_fork(unsigned long clone_flags,
 
 		if (clone_flags & CLONE_VFORK) {
 			wait_for_completion(&vfork);
-			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
+				current->ptrace_message = nr;
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
+			}
 		}
 	} else {
 		free_pid(pid);
-- 
cgit v1.2.3


From 78944e549d36673eb6265a2411574e79c28e23dc Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Sat, 5 Aug 2006 12:14:16 -0700
Subject: [PATCH] vt: printk: Fix framebuffer console triggering might_sleep
 assertion

Reported by: Dave Jones

Whilst printk'ing to both console and serial console, I got this...
(2.6.18rc1)

BUG: sleeping function called from invalid context at kernel/sched.c:4438
in_atomic():0, irqs_disabled():1

Call Trace:
 [<ffffffff80271db8>] show_trace+0xaa/0x23d
 [<ffffffff80271f60>] dump_stack+0x15/0x17
 [<ffffffff8020b9f8>] __might_sleep+0xb2/0xb4
 [<ffffffff8029232e>] __cond_resched+0x15/0x55
 [<ffffffff80267eb8>] cond_resched+0x3b/0x42
 [<ffffffff80268c64>] console_conditional_schedule+0x12/0x14
 [<ffffffff80368159>] fbcon_redraw+0xf6/0x160
 [<ffffffff80369c58>] fbcon_scroll+0x5d9/0xb52
 [<ffffffff803a43c4>] scrup+0x6b/0xd6
 [<ffffffff803a4453>] lf+0x24/0x44
 [<ffffffff803a7ff8>] vt_console_print+0x166/0x23d
 [<ffffffff80295528>] __call_console_drivers+0x65/0x76
 [<ffffffff80295597>] _call_console_drivers+0x5e/0x62
 [<ffffffff80217e3f>] release_console_sem+0x14b/0x232
 [<ffffffff8036acd6>] fb_flashcursor+0x279/0x2a6
 [<ffffffff80251e3f>] run_workqueue+0xa8/0xfb
 [<ffffffff8024e5e0>] worker_thread+0xef/0x122
 [<ffffffff8023660f>] kthread+0x100/0x136
 [<ffffffff8026419e>] child_rip+0x8/0x12

This can occur when release_console_sem() is called but the log
buffer still has contents that need to be flushed. The console drivers
are called while the console_may_schedule flag is still true. The
might_sleep() is triggered when fbcon calls console_conditional_schedule().

Fix by setting console_may_schedule to zero earlier, before the call to the
console drivers.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/printk.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 65ca0688f86..1149365e989 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -799,6 +799,9 @@ void release_console_sem(void)
 		up(&secondary_console_sem);
 		return;
 	}
+
+	console_may_schedule = 0;
+
 	for ( ; ; ) {
 		spin_lock_irqsave(&logbuf_lock, flags);
 		wake_klogd |= log_start - log_end;
@@ -812,7 +815,6 @@ void release_console_sem(void)
 		local_irq_restore(flags);
 	}
 	console_locked = 0;
-	console_may_schedule = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
 	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
-- 
cgit v1.2.3


From 0f04ab5efbca73ab366a156d96b073d2da35b158 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sat, 5 Aug 2006 12:14:59 -0700
Subject: [PATCH] memory hotadd fixes: change find_next_system_ram's return
 value manner

find_next_system_ram() returns valid memory range which meets requested area,
only used by memory-hot-add.

This function always rewrite requested resource even if returned area is not
fully fit in requested one.  And sometimes the returnd resource is larger than
requested area.  This annoyes the caller.  This patch changes the returned
value to fit in requested area.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Keith Mannthey <kmannth@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/resource.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/resource.c b/kernel/resource.c
index 0dd3a857579..63e879379db 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -261,8 +261,10 @@ int find_next_system_ram(struct resource *res)
 	if (!p)
 		return -1;
 	/* copy data */
-	res->start = p->start;
-	res->end = p->end;
+	if (res->start < p->start)
+		res->start = p->start;
+	if (res->end > p->end)
+		res->end = p->end;
 	return 0;
 }
 #endif
-- 
cgit v1.2.3


From 58c1b5b079071d82b2f924000b7e8fb5585ce7d8 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Sat, 5 Aug 2006 12:15:01 -0700
Subject: [PATCH] memory hotadd fixes: find_next_system_ram catch range fix

find_next_system_ram() is used to find available memory resource at onlining
newly added memory.  This patch fixes following problem.

find_next_system_ram() cannot catch this case.

Resource:      (start)-------------(end)
Section :                (start)-------------(end)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Keith Mannthey <kmannth@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/resource.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/resource.c b/kernel/resource.c
index 63e879379db..46286434af8 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -244,6 +244,7 @@ int find_next_system_ram(struct resource *res)
 
 	start = res->start;
 	end = res->end;
+	BUG_ON(start >= end);
 
 	read_lock(&resource_lock);
 	for (p = iomem_resource.child; p ; p = p->sibling) {
@@ -254,7 +255,7 @@ int find_next_system_ram(struct resource *res)
 			p = NULL;
 			break;
 		}
-		if (p->start >= start)
+		if ((p->end >= start) && (p->start < end))
 			break;
 	}
 	read_unlock(&resource_lock);
-- 
cgit v1.2.3


From ce2c6b53847afc444c4d0a7a1075c61f499c57a5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 5 Aug 2006 12:15:15 -0700
Subject: [PATCH] futex: Apply recent futex fixes to futex_compat

The recent fixups in futex.c need to be applied to futex_compat.c too.  Fixes
a hang reported by Olaf.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Olaf Hering <olh@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/futex_compat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d1aab1a452c..c5cca3f65cb 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -39,7 +39,7 @@ void compat_exit_robust_list(struct task_struct *curr)
 {
 	struct compat_robust_list_head __user *head = curr->compat_robust_list;
 	struct robust_list __user *entry, *pending;
-	unsigned int limit = ROBUST_LIST_LIMIT, pi;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
 	compat_uptr_t uentry, upending;
 	compat_long_t futex_offset;
 
@@ -59,10 +59,10 @@ void compat_exit_robust_list(struct task_struct *curr)
 	 * if it exists:
 	 */
 	if (fetch_robust_entry(&upending, &pending,
-			       &head->list_op_pending, &pi))
+			       &head->list_op_pending, &pip))
 		return;
 	if (upending)
-		handle_futex_death((void *)pending + futex_offset, curr, pi);
+		handle_futex_death((void *)pending + futex_offset, curr, pip);
 
 	while (compat_ptr(uentry) != &head->list) {
 		/*
-- 
cgit v1.2.3