aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-02-13 09:34:07 +0100
committerIngo Molnar <mingo@elte.hu>2009-02-13 09:34:07 +0100
commite9c4ffb11f0b19005b5b9dc8481687a3637e5887 (patch)
tree7007f2ff846b9b057c5cd7c25e8b82e49f9b4b63 /kernel
parent4bcf349a0f90d1e69eb35c6df0fa285c886c1cd6 (diff)
parent071a0bc2ceace31266836801510879407a3701fa (diff)
Merge branch 'linus' into perfcounters/core
Conflicts: arch/x86/kernel/acpi/boot.c
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c3
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/itimer.c4
-rw-r--r--kernel/posix-cpu-timers.c117
-rw-r--r--kernel/profile.c3
-rw-r--r--kernel/sched.c27
-rw-r--r--kernel/sched_fair.c11
-rw-r--r--kernel/sched_stats.h45
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sysctl.c5
11 files changed, 178 insertions, 53 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5a54ff42874..e14db9c089b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2351,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->root == root)
- mutex_lock_nested(&ss->hierarchy_mutex, i);
+ mutex_lock(&ss->hierarchy_mutex);
}
}
@@ -2637,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
BUG_ON(!list_empty(&init_task.tasks));
mutex_init(&ss->hierarchy_mutex);
+ lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
ss->active = 1;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index a1b18c03b4c..f52c24eb8a8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
+ sig->utime = cputime_add(sig->utime, task_utime(tsk));
+ sig->stime = cputime_add(sig->stime, task_stime(tsk));
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
@@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
task_io_accounting_add(&sig->ioac, &tsk->ioac);
+ sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
diff --git a/kernel/fork.c b/kernel/fork.c
index b01c5b04bcf..4640a3e0085 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -856,13 +856,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->tty_old_pgrp = NULL;
sig->tty = NULL;
- sig->cutime = sig->cstime = cputime_zero;
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->gtime = cputime_zero;
sig->cgtime = cputime_zero;
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
task_io_accounting_init(&sig->ioac);
+ sig->sum_sched_runtime = 0;
taskstats_tgid_init(sig);
task_lock(current->group_leader);
@@ -1100,7 +1101,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
- if (unlikely(ptrace_reparented(current)))
+ if (unlikely(current->ptrace))
ptrace_fork(p, clone_flags);
/* Perform scheduler related setup. Assign this task to a CPU. */
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 6a5fe93dd8b..58762f7077e 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)
struct task_cputime cputime;
cputime_t utime;
- thread_group_cputime(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime);
utime = cputime.utime;
if (cputime_le(cval, utime)) { /* about to fire */
cval = jiffies_to_cputime(1);
@@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value)
struct task_cputime times;
cputime_t ptime;
- thread_group_cputime(tsk, &times);
+ thread_group_cputimer(tsk, &times);
ptime = cputime_add(times.utime, times.stime);
if (cputime_le(cval, ptime)) { /* about to fire */
cval = jiffies_to_cputime(1);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index fa07da94d7b..2313a4cc14e 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -230,6 +230,71 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
return 0;
}
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+{
+ struct sighand_struct *sighand;
+ struct signal_struct *sig;
+ struct task_struct *t;
+
+ *times = INIT_CPUTIME;
+
+ rcu_read_lock();
+ sighand = rcu_dereference(tsk->sighand);
+ if (!sighand)
+ goto out;
+
+ sig = tsk->signal;
+
+ t = tsk;
+ do {
+ times->utime = cputime_add(times->utime, t->utime);
+ times->stime = cputime_add(times->stime, t->stime);
+ times->sum_exec_runtime += t->se.sum_exec_runtime;
+
+ t = next_thread(t);
+ } while (t != tsk);
+
+ times->utime = cputime_add(times->utime, sig->utime);
+ times->stime = cputime_add(times->stime, sig->stime);
+ times->sum_exec_runtime += sig->sum_sched_runtime;
+out:
+ rcu_read_unlock();
+}
+
+static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
+{
+ if (cputime_gt(b->utime, a->utime))
+ a->utime = b->utime;
+
+ if (cputime_gt(b->stime, a->stime))
+ a->stime = b->stime;
+
+ if (b->sum_exec_runtime > a->sum_exec_runtime)
+ a->sum_exec_runtime = b->sum_exec_runtime;
+}
+
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+{
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ struct task_cputime sum;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cputimer->lock, flags);
+ if (!cputimer->running) {
+ cputimer->running = 1;
+ /*
+ * The POSIX timer interface allows for absolute time expiry
+ * values through the TIMER_ABSTIME flag, therefore we have
+ * to synchronize the timer to the clock every time we start
+ * it.
+ */
+ thread_group_cputime(tsk, &sum);
+ update_gt_cputime(&cputimer->cputime, &sum);
+ }
+ *times = cputimer->cputime;
+ spin_unlock_irqrestore(&cputimer->lock, flags);
+}
+
/*
* Sample a process (thread group) clock for the given group_leader task.
* Must be called with tasklist_lock held for reading.
@@ -457,7 +522,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
struct task_cputime cputime;
- thread_group_cputime(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime);
cleanup_timers(tsk->signal->cpu_timers,
cputime.utime, cputime.stime, cputime.sum_exec_runtime);
}
@@ -964,6 +1029,19 @@ static void check_thread_timers(struct task_struct *tsk,
}
}
+static void stop_process_timers(struct task_struct *tsk)
+{
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ unsigned long flags;
+
+ if (!cputimer->running)
+ return;
+
+ spin_lock_irqsave(&cputimer->lock, flags);
+ cputimer->running = 0;
+ spin_unlock_irqrestore(&cputimer->lock, flags);
+}
+
/*
* Check for any per-thread CPU timers that have fired and move them
* off the tsk->*_timers list onto the firing list. Per-thread timers
@@ -987,13 +1065,15 @@ static void check_process_timers(struct task_struct *tsk,
sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
list_empty(&timers[CPUCLOCK_VIRT]) &&
cputime_eq(sig->it_virt_expires, cputime_zero) &&
- list_empty(&timers[CPUCLOCK_SCHED]))
+ list_empty(&timers[CPUCLOCK_SCHED])) {
+ stop_process_timers(tsk);
return;
+ }
/*
* Collect the current process totals.
*/
- thread_group_cputime(tsk, &cputime);
+ thread_group_cputimer(tsk, &cputime);
utime = cputime.utime;
ptime = cputime_add(utime, cputime.stime);
sum_sched_runtime = cputime.sum_exec_runtime;
@@ -1259,7 +1339,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (!task_cputime_zero(&sig->cputime_expires)) {
struct task_cputime group_sample;
- thread_group_cputime(tsk, &group_sample);
+ thread_group_cputimer(tsk, &group_sample);
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
}
@@ -1329,6 +1409,33 @@ void run_posix_cpu_timers(struct task_struct *tsk)
}
/*
+ * Sample a process (thread group) timer for the given group_leader task.
+ * Must be called with tasklist_lock held for reading.
+ */
+static int cpu_timer_sample_group(const clockid_t which_clock,
+ struct task_struct *p,
+ union cpu_time_count *cpu)
+{
+ struct task_cputime cputime;
+
+ thread_group_cputimer(p, &cputime);
+ switch (CPUCLOCK_WHICH(which_clock)) {
+ default:
+ return -EINVAL;
+ case CPUCLOCK_PROF:
+ cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+ break;
+ case CPUCLOCK_VIRT:
+ cpu->cpu = cputime.utime;
+ break;
+ case CPUCLOCK_SCHED:
+ cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+ break;
+ }
+ return 0;
+}
+
+/*
* Set one of the process-wide special case CPU timers.
* The tsk->sighand->siglock must be held by the caller.
* The *newval argument is relative and we update it to be absolute, *oldval
@@ -1341,7 +1448,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
struct list_head *head;
BUG_ON(clock_idx == CPUCLOCK_SCHED);
- cpu_clock_sample_group(clock_idx, tsk, &now);
+ cpu_timer_sample_group(clock_idx, tsk, &now);
if (oldval) {
if (!cputime_eq(*oldval, cputime_zero)) {
diff --git a/kernel/profile.c b/kernel/profile.c
index 784933acf5b..7724e0409ba 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -114,12 +114,15 @@ int __ref profile_init(void)
if (!slab_is_available()) {
prof_buffer = alloc_bootmem(buffer_bytes);
alloc_bootmem_cpumask_var(&prof_cpu_mask);
+ cpumask_copy(prof_cpu_mask, cpu_possible_mask);
return 0;
}
if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
return -ENOMEM;
+ cpumask_copy(prof_cpu_mask, cpu_possible_mask);
+
prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
if (prof_buffer)
return 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 0952931ca7f..83b68ff6df8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2311,16 +2311,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
if (!sched_feat(SYNC_WAKEUPS))
sync = 0;
- if (!sync) {
- if (current->se.avg_overlap < sysctl_sched_migration_cost &&
- p->se.avg_overlap < sysctl_sched_migration_cost)
- sync = 1;
- } else {
- if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
- p->se.avg_overlap >= sysctl_sched_migration_cost)
- sync = 0;
- }
-
#ifdef CONFIG_SMP
if (sched_feat(LB_WAKEUP_UPDATE)) {
struct sched_domain *sd;
@@ -3952,19 +3942,24 @@ int select_nohz_load_balancer(int stop_tick)
int cpu = smp_processor_id();
if (stop_tick) {
- cpumask_set_cpu(cpu, nohz.cpu_mask);
cpu_rq(cpu)->in_nohz_recently = 1;
- /*
- * If we are going offline and still the leader, give up!
- */
- if (!cpu_active(cpu) &&
- atomic_read(&nohz.load_balancer) == cpu) {
+ if (!cpu_active(cpu)) {
+ if (atomic_read(&nohz.load_balancer) != cpu)
+ return 0;
+
+ /*
+ * If we are going offline and still the leader,
+ * give up!
+ */
if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
BUG();
+
return 0;
}
+ cpumask_set_cpu(cpu, nohz.cpu_mask);
+
/* time for ilb owner also to sleep */
if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
if (atomic_read(&nohz.load_balancer) == cpu)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a7e50ba185a..0566f2a03c4 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1191,15 +1191,20 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
int idx, unsigned long load, unsigned long this_load,
unsigned int imbalance)
{
+ struct task_struct *curr = this_rq->curr;
+ struct task_group *tg;
unsigned long tl = this_load;
unsigned long tl_per_task;
- struct task_group *tg;
unsigned long weight;
int balanced;
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
return 0;
+ if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
+ p->se.avg_overlap > sysctl_sched_migration_cost))
+ sync = 0;
+
/*
* If sync wakeup then subtract the (maximum possible)
* effect of the currently running task from the load
@@ -1426,7 +1431,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
if (!sched_feat(WAKEUP_PREEMPT))
return;
- if (sched_feat(WAKEUP_OVERLAP) && sync) {
+ if (sched_feat(WAKEUP_OVERLAP) && (sync ||
+ (se->avg_overlap < sysctl_sched_migration_cost &&
+ pse->avg_overlap < sysctl_sched_migration_cost))) {
resched_task(curr);
return;
}
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 8ab0cef8eca..a8f93dd374e 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -296,19 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
static inline void account_group_user_time(struct task_struct *tsk,
cputime_t cputime)
{
- struct task_cputime *times;
- struct signal_struct *sig;
+ struct thread_group_cputimer *cputimer;
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
- sig = tsk->signal;
- times = &sig->cputime.totals;
+ cputimer = &tsk->signal->cputimer;
- spin_lock(&times->lock);
- times->utime = cputime_add(times->utime, cputime);
- spin_unlock(&times->lock);
+ if (!cputimer->running)
+ return;
+
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.utime =
+ cputime_add(cputimer->cputime.utime, cputime);
+ spin_unlock(&cputimer->lock);
}
/**
@@ -324,19 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk,
static inline void account_group_system_time(struct task_struct *tsk,
cputime_t cputime)
{
- struct task_cputime *times;
- struct signal_struct *sig;
+ struct thread_group_cputimer *cputimer;
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
- sig = tsk->signal;
- times = &sig->cputime.totals;
+ cputimer = &tsk->signal->cputimer;
+
+ if (!cputimer->running)
+ return;
- spin_lock(&times->lock);
- times->stime = cputime_add(times->stime, cputime);
- spin_unlock(&times->lock);
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.stime =
+ cputime_add(cputimer->cputime.stime, cputime);
+ spin_unlock(&cputimer->lock);
}
/**
@@ -352,7 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
static inline void account_group_exec_runtime(struct task_struct *tsk,
unsigned long long ns)
{
- struct task_cputime *times;
+ struct thread_group_cputimer *cputimer;
struct signal_struct *sig;
sig = tsk->signal;
@@ -361,9 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
if (unlikely(!sig))
return;
- times = &sig->cputime.totals;
+ cputimer = &sig->cputimer;
+
+ if (!cputimer->running)
+ return;
- spin_lock(&times->lock);
- times->sum_exec_runtime += ns;
- spin_unlock(&times->lock);
+ spin_lock(&cputimer->lock);
+ cputimer->cputime.sum_exec_runtime += ns;
+ spin_unlock(&cputimer->lock);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index b6b36768b75..2a74fe87c0d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1367,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig)
struct siginfo info;
unsigned long flags;
struct sighand_struct *psig;
- struct task_cputime cputime;
int ret = sig;
BUG_ON(sig == -1);
@@ -1397,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig)
info.si_uid = __task_cred(tsk)->uid;
rcu_read_unlock();
- thread_group_cputime(tsk, &cputime);
- info.si_utime = cputime_to_jiffies(cputime.utime);
- info.si_stime = cputime_to_jiffies(cputime.stime);
+ info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
+ tsk->signal->utime));
+ info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
+ tsk->signal->stime));
info.si_status = tsk->exit_code & 0x7f;
if (tsk->exit_code & 0x80)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 790f9d78566..c5ef44ff850 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int two = 2;
static int zero;
static int one = 1;
+static unsigned long one_ul = 1;
static int one_hundred = 100;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -974,7 +975,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &dirty_background_bytes_handler,
.strategy = &sysctl_intvec,
- .extra1 = &one,
+ .extra1 = &one_ul,
},
{
.ctl_name = VM_DIRTY_RATIO,
@@ -995,7 +996,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &dirty_bytes_handler,
.strategy = &sysctl_intvec,
- .extra1 = &one,
+ .extra1 = &one_ul,
},
{
.procname = "dirty_writeback_centisecs",