Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6

author: Steve French <sfrench@us.ibm.com> 2009-07-16 04:21:39 +0000
committer: Steve French <sfrench@us.ibm.com> 2009-07-16 04:21:39 +0000
commit: f6c43385435640e056424034caac0d765c45e370 (patch)
tree: 54c2ba0a7d3abb156bc51f6b9f02e00f0a260f85 /kernel
parent: 65bc98b0059360e458aebd208587be44641227c1 (diff)
parent: 35b5c55fee08e6e4001ba98060a2d0b82f70b5f4 (diff)
15 files changed, 91 insertions, 89 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 794c862125f..0672ff88f15 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -247,6 +247,7 @@ again:
 	if (err < 0)
 		return err;
 
+	page = compound_head(page);
 	lock_page(page);
 	if (!page->mapping) {
 		unlock_page(page);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9002958a96e..49da79ab848 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 	}
 }
 
+
+/*
+ * Get the preferred target CPU for NOHZ
+ */
+static int hrtimer_get_target(int this_cpu, int pinned)
+{
+#ifdef CONFIG_NO_HZ
+	if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
+		int preferred_cpu = get_nohz_load_balancer();
+
+		if (preferred_cpu >= 0)
+			return preferred_cpu;
+	}
+#endif
+	return this_cpu;
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+	ktime_t expires;
+
+	if (!new_base->cpu_base->hres_active)
+		return 0;
+
+	expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+	return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+	return 0;
+#endif
+}
+
 /*
  * Switch the timer base to the current CPU when possible.
  */
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 {
 	struct hrtimer_clock_base *new_base;
 	struct hrtimer_cpu_base *new_cpu_base;
-	int cpu, preferred_cpu = -1;
-
-	cpu = smp_processor_id();
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
-	if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
-		preferred_cpu = get_nohz_load_balancer();
-		if (preferred_cpu >= 0)
-			cpu = preferred_cpu;
-	}
-#endif
+	int this_cpu = smp_processor_id();
+	int cpu = hrtimer_get_target(this_cpu, pinned);
 
 again:
 	new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -217,7 +249,7 @@ again:
 
 	if (base != new_base) {
 		/*
-		 * We are trying to schedule the timer on the local CPU.
+		 * We are trying to move timer to new_base.
 		 * However we can't change timer's base while it is running,
 		 * so we keep it on the same CPU. No hassle vs. reprogramming
 		 * the event source in the high resolution case. The softirq
@@ -233,38 +265,12 @@ again:
 		spin_unlock(&base->cpu_base->lock);
 		spin_lock(&new_base->cpu_base->lock);
 
-		/* Optimized away for NOHZ=n SMP=n */
-		if (cpu == preferred_cpu) {
-			/* Calculate clock monotonic expiry time */
-#ifdef CONFIG_HIGH_RES_TIMERS
-			ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
-							new_base->offset);
-#else
-			ktime_t expires = hrtimer_get_expires(timer);
-#endif
-
-			/*
-			 * Get the next event on target cpu from the
-			 * clock events layer.
-			 * This covers the highres=off nohz=on case as well.
-			 */
-			ktime_t next = clockevents_get_next_event(cpu);
-
-			ktime_t delta = ktime_sub(expires, next);
-
-			/*
-			 * We do not migrate the timer when it is expiring
-			 * before the next event on the target cpu because
-			 * we cannot reprogram the target cpu hardware and
-			 * we would cause it to fire late.
-			 */
-			if (delta.tv64 < 0) {
-				cpu = smp_processor_id();
-				spin_unlock(&new_base->cpu_base->lock);
-				spin_lock(&base->cpu_base->lock);
-				timer->base = base;
-				goto again;
-			}
+		if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+			cpu = this_cpu;
+			spin_unlock(&new_base->cpu_base->lock);
+			spin_lock(&base->cpu_base->lock);
+			timer->base = base;
+			goto again;
 		}
 		timer->base = new_base;
 	}
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
 	expires_next.tv64 = KTIME_MAX;
 
+	spin_lock(&cpu_base->lock);
+	/*
+	 * We set expires_next to KTIME_MAX here with cpu_base->lock
+	 * held to prevent that a timer is enqueued in our queue via
+	 * the migration code. This does not affect enqueueing of
+	 * timers which run their callback and need to be requeued on
+	 * this CPU.
+	 */
+	cpu_base->expires_next.tv64 = KTIME_MAX;
+
 	base = cpu_base->clock_base;
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		ktime_t basenow;
 		struct rb_node *node;
 
-		spin_lock(&cpu_base->lock);
-
 		basenow = ktime_add(now, base->offset);
 
 		while ((node = base->first)) {
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
 			__run_hrtimer(timer);
 		}
-		spin_unlock(&cpu_base->lock);
 		base++;
 	}
 
+	/*
+	 * Store the new expiry value so the migration code can verify
+	 * against it.
+	 */
 	cpu_base->expires_next = expires_next;
+	spin_unlock(&cpu_base->lock);
 
 	/* Reprogramming necessary ? */
 	if (expires_next.tv64 != KTIME_MAX) {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b276d..16b5739c516 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos, *next;
-	int safety;
 
 	/* Ensure no-one is preepmted on the garbages */
-	mutex_unlock(&kprobe_insn_mutex);
-	safety = check_safety();
-	mutex_lock(&kprobe_insn_mutex);
-	if (safety != 0)
+	if (check_safety())
 		return -EAGAIN;
 
 	hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
diff --git a/kernel/pid.c b/kernel/pid.c
index 5fa1db48d8b..31310b5d3f5 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,7 +36,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
 #include <linux/syscalls.h>
-#include <linux/kmemleak.h>
 
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -513,12 +512,6 @@ void __init pidhash_init(void)
 	pid_hash = alloc_bootmem(pidhash_size *	sizeof(*(pid_hash)));
 	if (!pid_hash)
 		panic("Could not alloc pidhash!\n");
-	/*
-	 * pid_hash contains references to allocated struct pid objects and it
-	 * must be scanned by kmemleak to avoid false positives.
-	 */
-	kmemleak_alloc(pid_hash, pidhash_size *	sizeof(*(pid_hash)), 0,
-		       GFP_KERNEL);
 	for (i = 0; i < pidhash_size; i++)
 		INIT_HLIST_HEAD(&pid_hash[i]);
 }
diff --git a/kernel/power/user.c b/kernel/power/user.c
index ed97375daae..bf0014d6a5f 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,7 +23,6 @@
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
-#include <linux/smp_lock.h>
 #include <scsi/scsi_scan.h>
 
 #include <asm/uaccess.h>
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0dccfbba6d2..7717b95c202 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1533,7 +1533,7 @@ void __init __rcu_init(void)
 	int j;
 	struct rcu_node *rnp;
 
-	printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n");
+	printk(KERN_INFO "Hierarchical RCU implementation.\n");
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@@ -1546,7 +1546,6 @@ void __init __rcu_init(void)
 		rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
 	/* Register notifier for non-boot CPUs */
 	register_cpu_notifier(&rcu_nb);
-	printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
 }
 
 module_param(blimit, int, 0);
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e..01f55ada359 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6541,6 +6541,11 @@ SYSCALL_DEFINE0(sched_yield)
 	return 0;
 }
 
+static inline int should_resched(void)
+{
+	return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
+}
+
 static void __cond_resched(void)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6565,7 @@ static void __cond_resched(void)
 
 int __sched _cond_resched(void)
 {
-	if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
-					system_state == SYSTEM_RUNNING) {
+	if (should_resched()) {
 		__cond_resched();
 		return 1;
 	}
@@ -6579,12 +6583,12 @@ EXPORT_SYMBOL(_cond_resched);
  */
 int cond_resched_lock(spinlock_t *lock)
 {
-	int resched = need_resched() && system_state == SYSTEM_RUNNING;
+	int resched = should_resched();
 	int ret = 0;
 
 	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
-		if (resched && need_resched())
+		if (resched)
 			__cond_resched();
 		else
 			cpu_relax();
@@ -6599,7 +6603,7 @@ int __sched cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
-	if (need_resched() && system_state == SYSTEM_RUNNING) {
+	if (should_resched()) {
 		local_bh_enable();
 		__cond_resched();
 		local_bh_disable();
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1ad6dd46111..a6dcd67b041 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
 	spin_unlock(&clockevents_lock);
 }
 EXPORT_SYMBOL_GPL(clockevents_notify);
-
-ktime_t clockevents_get_next_event(int cpu)
-{
-	struct tick_device *td;
-	struct clock_event_device *dev;
-
-	td = &per_cpu(tick_cpu_device, cpu);
-	dev = td->evtdev;
-
-	return dev->next_event;
-}
 #endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47e766..019f380fd76 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
 	  the timings of the initcalls and traces key events and the identity
 	  of tasks that can cause boot delays, such as context-switches.
 
-	  Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+	  Its aim is to be parsed by the scripts/bootgraph.pl tool to
 	  produce pretty graphics about boot inefficiencies, giving a visual
 	  representation of the delays during initcalls - but the raw
 	  /debug/tracing/trace text output is readable too.
 
-	  You must pass in ftrace=initcall to the kernel command line
-	  to enable this on bootup.
+	  You must pass in initcall_debug and ftrace=initcall to the kernel
+	  command line to enable this on bootup.
 
 config TRACE_BRANCH_PROFILING
 	bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af6fc3..1090b0aed9b 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
+#include <linux/smp_lock.h>
 #include <linux/time.h>
 #include <linux/uaccess.h>
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3716bf04df..4521c77d1a1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -768,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
 	.stat_show	= function_stat_show
 };
 
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
 {
 	struct ftrace_profile_stat *stat;
 	struct dentry *entry;
@@ -786,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
 			 * The files created are permanent, if something happens
 			 * we still do not free memory.
 			 */
-			kfree(stat);
 			WARN(1,
 			     "Could not allocate stat file for cpu %d\n",
 			     cpu);
@@ -813,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
 }
 
 #else /* CONFIG_FUNCTION_PROFILER */
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
 {
 }
 #endif /* CONFIG_FUNCTION_PROFILER */
@@ -3160,10 +3159,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 	ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
 
-	if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
+	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
 		goto out;
 
-	last_ftrace_enabled = ftrace_enabled;
+	last_ftrace_enabled = !!ftrace_enabled;
 
 	if (ftrace_enabled) {
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3aa0a0dfdfa..8bc8d8afea6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
 #include <linux/debugfs.h>
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e375134..6db005e1248 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
 		   ftrace_graph_ret_entry, ignore,
 	TRACE_STRUCT(
 		TRACE_FIELD(unsigned long, ret.func, func)
+		TRACE_FIELD(unsigned long long, ret.calltime, calltime)
+		TRACE_FIELD(unsigned long long, ret.rettime, rettime)
+		TRACE_FIELD(unsigned long, ret.overrun, overrun)
 		TRACE_FIELD(int, ret.depth, depth)
 	),
 	TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e..e0c2545622e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
 
-	s->buffer[len] = 0;
-	seq_puts(m, s->buffer);
+	seq_write(m, s->buffer, len);
 
 	trace_seq_init(s);
 }
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71db..e644af91012 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
 	ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
 
 	if (ret || !write ||
-	    (last_stack_tracer_enabled == stack_tracer_enabled))
+	    (last_stack_tracer_enabled == !!stack_tracer_enabled))
 		goto out;
 
-	last_stack_tracer_enabled = stack_tracer_enabled;
+	last_stack_tracer_enabled = !!stack_tracer_enabled;
 
 	if (stack_tracer_enabled)
 		register_ftrace_function(&trace_ops);
author	Steve French <sfrench@us.ibm.com>	2009-07-16 04:21:39 +0000
committer	Steve French <sfrench@us.ibm.com>	2009-07-16 04:21:39 +0000
commit	f6c43385435640e056424034caac0d765c45e370 (patch)
tree	54c2ba0a7d3abb156bc51f6b9f02e00f0a260f85 /kernel
parent	65bc98b0059360e458aebd208587be44641227c1 (diff)
parent	35b5c55fee08e6e4001ba98060a2d0b82f70b5f4 (diff)