aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2009-07-16 04:21:39 +0000
committerSteve French <sfrench@us.ibm.com>2009-07-16 04:21:39 +0000
commitf6c43385435640e056424034caac0d765c45e370 (patch)
tree54c2ba0a7d3abb156bc51f6b9f02e00f0a260f85 /kernel
parent65bc98b0059360e458aebd208587be44641227c1 (diff)
parent35b5c55fee08e6e4001ba98060a2d0b82f70b5f4 (diff)
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'kernel')
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/hrtimer.c110
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/pid.c7
-rw-r--r--kernel/power/user.c1
-rw-r--r--kernel/rcutree.c3
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/time/clockevents.c11
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/blktrace.c1
-rw-r--r--kernel/trace/ftrace.c9
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace_event_types.h3
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_stack.c4
15 files changed, 91 insertions, 89 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 794c862125f..0672ff88f15 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -247,6 +247,7 @@ again:
if (err < 0)
return err;
+ page = compound_head(page);
lock_page(page);
if (!page->mapping) {
unlock_page(page);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9002958a96e..49da79ab848 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
}
}
+
+/*
+ * Get the preferred target CPU for NOHZ
+ */
+static int hrtimer_get_target(int this_cpu, int pinned)
+{
+#ifdef CONFIG_NO_HZ
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
+ int preferred_cpu = get_nohz_load_balancer();
+
+ if (preferred_cpu >= 0)
+ return preferred_cpu;
+ }
+#endif
+ return this_cpu;
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ktime_t expires;
+
+ if (!new_base->cpu_base->hres_active)
+ return 0;
+
+ expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+ return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+ return 0;
+#endif
+}
+
/*
* Switch the timer base to the current CPU when possible.
*/
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
- int cpu, preferred_cpu = -1;
-
- cpu = smp_processor_id();
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
- preferred_cpu = get_nohz_load_balancer();
- if (preferred_cpu >= 0)
- cpu = preferred_cpu;
- }
-#endif
+ int this_cpu = smp_processor_id();
+ int cpu = hrtimer_get_target(this_cpu, pinned);
again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -217,7 +249,7 @@ again:
if (base != new_base) {
/*
- * We are trying to schedule the timer on the local CPU.
+ * We are trying to move timer to new_base.
* However we can't change timer's base while it is running,
* so we keep it on the same CPU. No hassle vs. reprogramming
* the event source in the high resolution case. The softirq
@@ -233,38 +265,12 @@ again:
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
- /* Optimized away for NOHZ=n SMP=n */
- if (cpu == preferred_cpu) {
- /* Calculate clock monotonic expiry time */
-#ifdef CONFIG_HIGH_RES_TIMERS
- ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
- new_base->offset);
-#else
- ktime_t expires = hrtimer_get_expires(timer);
-#endif
-
- /*
- * Get the next event on target cpu from the
- * clock events layer.
- * This covers the highres=off nohz=on case as well.
- */
- ktime_t next = clockevents_get_next_event(cpu);
-
- ktime_t delta = ktime_sub(expires, next);
-
- /*
- * We do not migrate the timer when it is expiring
- * before the next event on the target cpu because
- * we cannot reprogram the target cpu hardware and
- * we would cause it to fire late.
- */
- if (delta.tv64 < 0) {
- cpu = smp_processor_id();
- spin_unlock(&new_base->cpu_base->lock);
- spin_lock(&base->cpu_base->lock);
- timer->base = base;
- goto again;
- }
+ if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+ cpu = this_cpu;
+ spin_unlock(&new_base->cpu_base->lock);
+ spin_lock(&base->cpu_base->lock);
+ timer->base = base;
+ goto again;
}
timer->base = new_base;
}
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
expires_next.tv64 = KTIME_MAX;
+ spin_lock(&cpu_base->lock);
+ /*
+ * We set expires_next to KTIME_MAX here with cpu_base->lock
+ * held to prevent that a timer is enqueued in our queue via
+ * the migration code. This does not affect enqueueing of
+ * timers which run their callback and need to be requeued on
+ * this CPU.
+ */
+ cpu_base->expires_next.tv64 = KTIME_MAX;
+
base = cpu_base->clock_base;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
ktime_t basenow;
struct rb_node *node;
- spin_lock(&cpu_base->lock);
-
basenow = ktime_add(now, base->offset);
while ((node = base->first)) {
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
__run_hrtimer(timer);
}
- spin_unlock(&cpu_base->lock);
base++;
}
+ /*
+ * Store the new expiry value so the migration code can verify
+ * against it.
+ */
cpu_base->expires_next = expires_next;
+ spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b276d..16b5739c516 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos, *next;
- int safety;
/* Ensure no-one is preepmted on the garbages */
- mutex_unlock(&kprobe_insn_mutex);
- safety = check_safety();
- mutex_lock(&kprobe_insn_mutex);
- if (safety != 0)
+ if (check_safety())
return -EAGAIN;
hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
diff --git a/kernel/pid.c b/kernel/pid.c
index 5fa1db48d8b..31310b5d3f5 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,7 +36,6 @@
#include <linux/pid_namespace.h>
#include <linux/init_task.h>
#include <linux/syscalls.h>
-#include <linux/kmemleak.h>
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -513,12 +512,6 @@ void __init pidhash_init(void)
pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
if (!pid_hash)
panic("Could not alloc pidhash!\n");
- /*
- * pid_hash contains references to allocated struct pid objects and it
- * must be scanned by kmemleak to avoid false positives.
- */
- kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0,
- GFP_KERNEL);
for (i = 0; i < pidhash_size; i++)
INIT_HLIST_HEAD(&pid_hash[i]);
}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index ed97375daae..bf0014d6a5f 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,7 +23,6 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
-#include <linux/smp_lock.h>
#include <scsi/scsi_scan.h>
#include <asm/uaccess.h>
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0dccfbba6d2..7717b95c202 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1533,7 +1533,7 @@ void __init __rcu_init(void)
int j;
struct rcu_node *rnp;
- printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n");
+ printk(KERN_INFO "Hierarchical RCU implementation.\n");
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@@ -1546,7 +1546,6 @@ void __init __rcu_init(void)
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
/* Register notifier for non-boot CPUs */
register_cpu_notifier(&rcu_nb);
- printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
}
module_param(blimit, int, 0);
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e..01f55ada359 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6541,6 +6541,11 @@ SYSCALL_DEFINE0(sched_yield)
return 0;
}
+static inline int should_resched(void)
+{
+ return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
+}
+
static void __cond_resched(void)
{
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6565,7 @@ static void __cond_resched(void)
int __sched _cond_resched(void)
{
- if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
- system_state == SYSTEM_RUNNING) {
+ if (should_resched()) {
__cond_resched();
return 1;
}
@@ -6579,12 +6583,12 @@ EXPORT_SYMBOL(_cond_resched);
*/
int cond_resched_lock(spinlock_t *lock)
{
- int resched = need_resched() && system_state == SYSTEM_RUNNING;
+ int resched = should_resched();
int ret = 0;
if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
- if (resched && need_resched())
+ if (resched)
__cond_resched();
else
cpu_relax();
@@ -6599,7 +6603,7 @@ int __sched cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
- if (need_resched() && system_state == SYSTEM_RUNNING) {
+ if (should_resched()) {
local_bh_enable();
__cond_resched();
local_bh_disable();
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1ad6dd46111..a6dcd67b041 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
-
-ktime_t clockevents_get_next_event(int cpu)
-{
- struct tick_device *td;
- struct clock_event_device *dev;
-
- td = &per_cpu(tick_cpu_device, cpu);
- dev = td->evtdev;
-
- return dev->next_event;
-}
#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47e766..019f380fd76 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
the timings of the initcalls and traces key events and the identity
of tasks that can cause boot delays, such as context-switches.
- Its aim is to be parsed by the /scripts/bootgraph.pl tool to
+ Its aim is to be parsed by the scripts/bootgraph.pl tool to
produce pretty graphics about boot inefficiencies, giving a visual
representation of the delays during initcalls - but the raw
/debug/tracing/trace text output is readable too.
- You must pass in ftrace=initcall to the kernel command line
- to enable this on bootup.
+ You must pass in initcall_debug and ftrace=initcall to the kernel
+ command line to enable this on bootup.
config TRACE_BRANCH_PROFILING
bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af6fc3..1090b0aed9b 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/debugfs.h>
+#include <linux/smp_lock.h>
#include <linux/time.h>
#include <linux/uaccess.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3716bf04df..4521c77d1a1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -768,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
.stat_show = function_stat_show
};
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
struct ftrace_profile_stat *stat;
struct dentry *entry;
@@ -786,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
* The files created are permanent, if something happens
* we still do not free memory.
*/
- kfree(stat);
WARN(1,
"Could not allocate stat file for cpu %d\n",
cpu);
@@ -813,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
}
#else /* CONFIG_FUNCTION_PROFILER */
-static void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
}
#endif /* CONFIG_FUNCTION_PROFILER */
@@ -3160,10 +3159,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
- if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
+ if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
goto out;
- last_ftrace_enabled = ftrace_enabled;
+ last_ftrace_enabled = !!ftrace_enabled;
if (ftrace_enabled) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3aa0a0dfdfa..8bc8d8afea6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
+#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
#include <linux/debugfs.h>
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e375134..6db005e1248 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
ftrace_graph_ret_entry, ignore,
TRACE_STRUCT(
TRACE_FIELD(unsigned long, ret.func, func)
+ TRACE_FIELD(unsigned long long, ret.calltime, calltime)
+ TRACE_FIELD(unsigned long long, ret.rettime, rettime)
+ TRACE_FIELD(unsigned long, ret.overrun, overrun)
TRACE_FIELD(int, ret.depth, depth)
),
TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e..e0c2545622e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
- s->buffer[len] = 0;
- seq_puts(m, s->buffer);
+ seq_write(m, s->buffer, len);
trace_seq_init(s);
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71db..e644af91012 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
if (ret || !write ||
- (last_stack_tracer_enabled == stack_tracer_enabled))
+ (last_stack_tracer_enabled == !!stack_tracer_enabled))
goto out;
- last_stack_tracer_enabled = stack_tracer_enabled;
+ last_stack_tracer_enabled = !!stack_tracer_enabled;
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);