From eba1ed4b7e52720e3099325874811c38a5ec1562 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:02 +0200 Subject: sched: debug: track maximum 'slice' track the maximum amount of time a task has executed while the CPU load was at least 2x. (i.e. at least two nice-0 tasks were runnable) Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 833f7dc2b8d..9761b165d56 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -921,6 +921,7 @@ struct sched_entity { u64 block_start; u64 block_max; u64 exec_max; + u64 slice_max; unsigned long wait_runtime_overruns; unsigned long wait_runtime_underruns; -- cgit v1.2.3 From 2bd8e6d422a4f44c0994f909317eba80b0fe08a1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:02 +0200 Subject: sched: use constants if !CONFIG_SCHED_DEBUG use constants if !CONFIG_SCHED_DEBUG. this speeds up the code and reduces code-size: text data bss dec hex filename 27464 3014 16 30494 771e sched.o.before 26929 3010 20 29959 7507 sched.o.after Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9761b165d56..befca3f9364 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1402,15 +1402,18 @@ static inline void idle_task_exit(void) {} extern void sched_idle_next(void); +#ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_batch_wakeup_granularity; extern unsigned int sysctl_sched_stat_granularity; extern unsigned int sysctl_sched_runtime_limit; -extern unsigned int sysctl_sched_compat_yield; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; +#endif + +extern unsigned int sysctl_sched_compat_yield; #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); -- cgit v1.2.3 From 8ebc91d93669af39dbed50914d7daf457eeb43be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:03 +0200 Subject: sched: remove stat_gran remove the stat_gran code - it was disabled by default and it causes unnecessary overhead. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index befca3f9364..3c38a5040e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -895,9 +895,6 @@ struct load_weight { */ struct sched_entity { long wait_runtime; - unsigned long delta_fair_run; - unsigned long delta_fair_sleep; - unsigned long delta_exec; s64 fair_key; struct load_weight load; /* for load-balancing */ struct rb_node run_node; -- cgit v1.2.3 From e9acbff6484df51fd880e0f5fe0224e8be34c17b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:04 +0200 Subject: sched: introduce se->vruntime introduce se->vruntime as a sum of weighted delta-exec's, and use that as the key into the tree. the idea to use absolute virtual time as the basic metric of scheduling has been first raised by William Lee Irwin, advanced by Tong Li and first prototyped by Roman Zippel in the "Really Fair Scheduler" (RFS) patchset. also see: http://lkml.org/lkml/2007/9/2/76 for a simpler variant of this patch. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3c38a5040e8..5e5c457fba8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -902,6 +902,7 @@ struct sched_entity { u64 exec_start; u64 sum_exec_runtime; + u64 vruntime; u64 prev_sum_exec_runtime; u64 wait_start_fair; u64 sleep_start_fair; -- cgit v1.2.3 From e22f5bbf86d8cce710d5c8ba5bf57832e73aab8c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:06 +0200 Subject: sched: remove wait_runtime limit remove the wait_runtime-limit fields and the code depending on it, now that the math has been changed over to rely on the vruntime metric. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5e5c457fba8..353630d6ae4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -905,7 +905,6 @@ struct sched_entity { u64 vruntime; u64 prev_sum_exec_runtime; u64 wait_start_fair; - u64 sleep_start_fair; #ifdef CONFIG_SCHEDSTATS u64 wait_start; -- cgit v1.2.3 From bbdba7c0e1161934ae881ad00e4db49830f5ef59 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:06 +0200 Subject: sched: remove wait_runtime fields and features remove wait_runtime based fields and features, now that the CFS math has been changed over to the vruntime metric. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 353630d6ae4..572df1bbaee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -888,13 +888,9 @@ struct load_weight { * 4 se->block_start * 4 se->run_node * 4 se->sleep_start - * 4 se->sleep_start_fair * 6 se->load.weight - * 7 se->delta_fair - * 15 se->wait_runtime */ struct sched_entity { - long wait_runtime; s64 fair_key; struct load_weight load; /* for load-balancing */ struct rb_node run_node; @@ -904,12 +900,10 @@ struct sched_entity { u64 sum_exec_runtime; u64 vruntime; u64 prev_sum_exec_runtime; - u64 wait_start_fair; #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; - s64 sum_wait_runtime; u64 sleep_start; u64 sleep_max; @@ -919,9 +913,6 @@ struct sched_entity { u64 block_max; u64 exec_max; u64 slice_max; - - unsigned long wait_runtime_overruns; - unsigned long wait_runtime_underruns; #endif #ifdef CONFIG_FAIR_GROUP_SCHED -- cgit v1.2.3 From 30cfdcfc5f180fc21a3dad6ae3b7b2a9ee112186 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Mon, 15 Oct 2007 17:00:07 +0200 Subject: sched: do not keep current in the tree and get rid of sched_entity::fair_key Get rid of 'sched_entity::fair_key'. As a side effect, 'current' is not kept withing the tree for SCHED_NORMAL/BATCH tasks anymore. This simplifies some parts of code (e.g. entity_tick() and yield_task_fair()) and also somewhat optimizes them (e.g. a single update_curr() now vs. dequeue/enqueue() before in entity_tick()). Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 572df1bbaee..f776a30b403 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -891,7 +891,6 @@ struct load_weight { * 6 se->load.weight */ struct sched_entity { - s64 fair_key; struct load_weight load; /* for load-balancing */ struct rb_node run_node; unsigned int on_rq; -- cgit v1.2.3 From 4530d7ab0fb8d5056b68c376949e2d5c4db7817e Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Mon, 15 Oct 2007 17:00:08 +0200 Subject: sched: simplify sched_class::yield_task() the 'p' (task_struct) parameter in the sched_class :: yield_task() is redundant as the caller is always the 'current'. Get rid of it. text data bss dec hex filename 24341 2734 20 27095 69d7 sched.o.before 24330 2734 20 27084 69cc sched.o.after Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f776a30b403..66169005f00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -858,7 +858,7 @@ struct sched_class { void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); - void (*yield_task) (struct rq *rq, struct task_struct *p); + void (*yield_task) (struct rq *rq); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); -- cgit v1.2.3 From f6b53205e17c8ca481c69ed579a35a650a4b481a Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Mon, 15 Oct 2007 17:00:08 +0200 Subject: sched: rework enqueue/dequeue_entity() to get rid of set_curr_task() rework enqueue/dequeue_entity() to get rid of sched_class::set_curr_task(). This simplifies sched_setscheduler(), rt_mutex_setprio() and sched_move_tasks(). text data bss dec hex filename 24330 2734 20 27084 69cc sched.o.before 24233 2730 20 26983 6967 sched.o.after Signed-off-by: Dmitry Adamushko Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 66169005f00..abcb02738d9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -871,7 +871,6 @@ struct sched_class { struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, int *this_best_prio); - void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); void (*task_new) (struct rq *rq, struct task_struct *p); }; -- cgit v1.2.3 From 83b699ed20f5218580a1b7042064082e2e05f8c5 Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 15 Oct 2007 17:00:08 +0200 Subject: sched: revert recent removal of set_curr_task() Revert removal of set_curr_task. Use put_prev_task/set_curr_task when changing groups/policies Signed-off-by: Srivatsa Vaddagiri < vatsa@linux.vnet.ibm.com> Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index abcb02738d9..66169005f00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -871,6 +871,7 @@ struct sched_class { struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, int *this_best_prio); + void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); void (*task_new) (struct rq *rq, struct task_struct *p); }; -- cgit v1.2.3 From 9b5b77512dce239fa168183fa71896712232e95a Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 15 Oct 2007 17:00:09 +0200 Subject: sched: clean up code under CONFIG_FAIR_GROUP_SCHED With the view of supporting user-id based fair scheduling (and not just container-based fair scheduling), this patch renames several functions and makes them independent of whether they are being used for container or user-id based fair scheduling. Also fix a problem reported by KAMEZAWA Hiroyuki (wrt allocating less-sized array for tg->cfs_rq[] and tf->se[]). Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 66169005f00..03c13b663e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu); struct seq_file; struct cfs_rq; +struct task_grp; #ifdef CONFIG_SCHED_DEBUG extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); @@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings; extern void normalize_rt_tasks(void); +#ifdef CONFIG_FAIR_GROUP_SCHED + +extern struct task_grp init_task_grp; + +extern struct task_grp *sched_create_group(void); +extern void sched_destroy_group(struct task_grp *tg); +extern void sched_move_task(struct task_struct *tsk); +extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares); + +#endif + #ifdef CONFIG_TASK_XACCT static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { -- cgit v1.2.3 From 24e377a83220ef05c9b5bec7e01d65eed6609aa6 Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 15 Oct 2007 17:00:09 +0200 Subject: sched: add fair-user scheduler Enable user-id based fair group scheduling. This is useful for anyone who wants to test the group scheduler w/o having to enable CONFIG_CGROUPS. A separate scheduling group (i.e struct task_grp) is automatically created for every new user added to the system. Upon uid change for a task, it is made to move to the corresponding scheduling group. A /proc tunable (/proc/root_user_share) is also provided to tune root user's quota of cpu bandwidth. Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 03c13b663e4..d0cc58311b1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -597,6 +597,10 @@ struct user_struct { /* Hash table maintenance information */ struct hlist_node uidhash_node; uid_t uid; + +#ifdef CONFIG_FAIR_USER_SCHED + struct task_grp *tg; +#endif }; extern struct user_struct *find_user(uid_t); -- cgit v1.2.3 From b8efb56172bc55082b8490778b07ef73eea0b551 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:10 +0200 Subject: sched debug: BKL usage statistics add per task and per rq BKL usage statistics. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d0cc58311b1..920eb7354d0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -621,6 +621,10 @@ struct sched_info { /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ +#ifdef CONFIG_SCHEDSTATS + /* BKL stats */ + unsigned long bkl_cnt; +#endif }; #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ -- cgit v1.2.3 From c18b8a7cbcbac46497ee1ce656b0e68197c7581d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:10 +0200 Subject: sched: remove unneeded tunables remove unneeded tunables. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 920eb7354d0..2c33227b0f8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1403,8 +1403,6 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_batch_wakeup_granularity; -extern unsigned int sysctl_sched_stat_granularity; -extern unsigned int sysctl_sched_runtime_limit; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; #endif -- cgit v1.2.3 From 67e9fb2a39a1d454218d50383094940982be138f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Oct 2007 17:00:10 +0200 Subject: sched: add vslice add vslice: the load-dependent "virtual slice" a task should run ideally, so that the observed latency stays within the sched_latency window. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c33227b0f8..d74830cc51e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -908,6 +908,7 @@ struct sched_entity { u64 sum_exec_runtime; u64 vruntime; u64 prev_sum_exec_runtime; + u64 last_min_vruntime; #ifdef CONFIG_SCHEDSTATS u64 wait_start; -- cgit v1.2.3 From 94359f05cb7e1fed0deccc83ebc30a1175a9ae16 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:11 +0200 Subject: sched: undo some of the recent changes undo some of the recent changes that are not needed after all, such as last_min_vruntime. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d74830cc51e..2c33227b0f8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -908,7 +908,6 @@ struct sched_entity { u64 sum_exec_runtime; u64 vruntime; u64 prev_sum_exec_runtime; - u64 last_min_vruntime; #ifdef CONFIG_SCHEDSTATS u64 wait_start; -- cgit v1.2.3 From 2d72376b3af1e7d4d4515ebfd0f4383f2e92c343 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:12 +0200 Subject: sched: clean up schedstats, cnt -> count rename all 'cnt' fields and variables to the less yucky 'count' name. yuckage noticed by Andrew Morton. no change in code, other than the /proc/sched_debug bkl_count string got a bit larger: text data bss dec hex filename 38236 3506 24 41766 a326 sched.o.before 38240 3506 24 41770 a32a sched.o.after Signed-off-by: Ingo Molnar Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c33227b0f8..d5daca4bcc6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -614,7 +614,7 @@ struct reclaim_state; #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info { /* cumulative counters */ - unsigned long pcnt; /* # of times run on this cpu */ + unsigned long pcount; /* # of times run on this cpu */ unsigned long long cpu_time, /* time spent on the cpu */ run_delay; /* time spent waiting on a runqueue */ @@ -623,7 +623,7 @@ struct sched_info { last_queued; /* when we were last queued to run */ #ifdef CONFIG_SCHEDSTATS /* BKL stats */ - unsigned long bkl_cnt; + unsigned long bkl_count; #endif }; #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ @@ -759,7 +759,7 @@ struct sched_domain { #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ - unsigned long lb_cnt[CPU_MAX_IDLE_TYPES]; + unsigned long lb_count[CPU_MAX_IDLE_TYPES]; unsigned long lb_failed[CPU_MAX_IDLE_TYPES]; unsigned long lb_balanced[CPU_MAX_IDLE_TYPES]; unsigned long lb_imbalance[CPU_MAX_IDLE_TYPES]; @@ -769,17 +769,17 @@ struct sched_domain { unsigned long lb_nobusyq[CPU_MAX_IDLE_TYPES]; /* Active load balancing */ - unsigned long alb_cnt; + unsigned long alb_count; unsigned long alb_failed; unsigned long alb_pushed; /* SD_BALANCE_EXEC stats */ - unsigned long sbe_cnt; + unsigned long sbe_count; unsigned long sbe_balanced; unsigned long sbe_pushed; /* SD_BALANCE_FORK stats */ - unsigned long sbf_cnt; + unsigned long sbf_count; unsigned long sbf_balanced; unsigned long sbf_pushed; -- cgit v1.2.3 From 5f6d858ecca78f71755859a346d845e302973cd1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Oct 2007 17:00:12 +0200 Subject: sched: speed up and simplify vslice calculations speed up and simplify vslice calculations. [ From: Mike Galbraith : build fix ] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d5daca4bcc6..97f736b749c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1400,7 +1400,7 @@ extern void sched_idle_next(void); #ifdef CONFIG_SCHED_DEBUG extern unsigned int sysctl_sched_latency; -extern unsigned int sysctl_sched_min_granularity; +extern unsigned int sysctl_sched_nr_latency; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_batch_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; -- cgit v1.2.3 From 5522d5d5f70005faeffff3ffc0cfa8eec0155de4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:12 +0200 Subject: sched: mark scheduling classes as const mark scheduling classes as const. The speeds up the code a bit and shrinks it: text data bss dec hex filename 40027 4018 292 44337 ad31 sched.o.before 40190 3842 292 44324 ad24 sched.o.after Signed-off-by: Ingo Molnar Reviewed-by: Thomas Gleixner --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 97f736b749c..47e3717a035 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -863,7 +863,7 @@ struct rq; struct sched_domain; struct sched_class { - struct sched_class *next; + const struct sched_class *next; void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); @@ -949,7 +949,7 @@ struct task_struct { int prio, static_prio, normal_prio; struct list_head run_list; - struct sched_class *sched_class; + const struct sched_class *sched_class; struct sched_entity se; #ifdef CONFIG_PREEMPT_NOTIFIERS -- cgit v1.2.3 From af92723262f3e0c431083f668b605a1dcdbe8f3d Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 15 Oct 2007 17:00:13 +0200 Subject: sched: cleanup, remove the TASK_NONINTERACTIVE flag Here's another piece of low hanging obsolete fruit. Remove obsolete TASK_NONINTERACTIVE. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 47e3717a035..49c7b374eac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -175,8 +175,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define EXIT_ZOMBIE 16 #define EXIT_DEAD 32 /* in tsk->state again */ -#define TASK_NONINTERACTIVE 64 -#define TASK_DEAD 128 +#define TASK_DEAD 64 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) -- cgit v1.2.3 From 4cf86d77f5942336e7cd9de874b38b3c83b54d5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:14 +0200 Subject: sched: cleanup: rename task_grp to task_group cleanup: rename task_grp to task_group. No need to save two characters and 'grp' is annoying to read. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 49c7b374eac..3cddbfc0c91 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -136,7 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu); struct seq_file; struct cfs_rq; -struct task_grp; +struct task_group; #ifdef CONFIG_SCHED_DEBUG extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); @@ -598,7 +598,7 @@ struct user_struct { uid_t uid; #ifdef CONFIG_FAIR_USER_SCHED - struct task_grp *tg; + struct task_group *tg; #endif }; @@ -1842,12 +1842,12 @@ extern void normalize_rt_tasks(void); #ifdef CONFIG_FAIR_GROUP_SCHED -extern struct task_grp init_task_grp; +extern struct task_group init_task_group; -extern struct task_grp *sched_create_group(void); -extern void sched_destroy_group(struct task_grp *tg); +extern struct task_group *sched_create_group(void); +extern void sched_destroy_group(struct task_group *tg); extern void sched_move_task(struct task_struct *tsk); -extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares); +extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); #endif -- cgit v1.2.3 From 5cb350baf580017da38199625b7365b1763d7180 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Mon, 15 Oct 2007 17:00:14 +0200 Subject: sched: group scheduling, sysfs tunables Add tunables in sysfs to modify a user's cpu share. A directory is created in sysfs for each new user in the system. /sys/kernel/uids//cpu_share Reading this file returns the cpu shares granted for the user. Writing into this file modifies the cpu share for the user. Only an administrator is allowed to modify a user's cpu share. Ex: # cd /sys/kernel/uids/ # cat 512/cpu_share 1024 # echo 2048 > 512/cpu_share # cat 512/cpu_share 2048 # Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3cddbfc0c91..04233c8974d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -87,6 +87,7 @@ struct sched_param { #include #include #include +#include #include @@ -599,9 +600,18 @@ struct user_struct { #ifdef CONFIG_FAIR_USER_SCHED struct task_group *tg; + struct kset kset; + struct subsys_attribute user_attr; + struct work_struct work; #endif }; +#ifdef CONFIG_FAIR_USER_SCHED +extern int uids_kobject_init(void); +#else +static inline int uids_kobject_init(void) { return 0; } +#endif + extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; @@ -1848,6 +1858,7 @@ extern struct task_group *sched_create_group(void); extern void sched_destroy_group(struct task_group *tg); extern void sched_move_task(struct task_struct *tsk); extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); +extern unsigned long sched_group_shares(struct task_group *tg); #endif -- cgit v1.2.3 From 95938a35c5562afa7af7252821e44132391a3db8 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 15 Oct 2007 17:00:14 +0200 Subject: sched: prevent wakeup over-scheduling Prevent wakeup over-scheduling. Once a task has been preempted by a task of the same or lower priority, it becomes ineligible for repeated preemption by same until it has been ticked, or slept. Instead, the task is marked for preemption at the next tick. Tasks of higher priority still preempt immediately. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 04233c8974d..8be5b57768c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -912,6 +912,7 @@ struct sched_entity { struct load_weight load; /* for load-balancing */ struct rb_node run_node; unsigned int on_rq; + int peer_preempt; u64 exec_start; u64 sum_exec_runtime; -- cgit v1.2.3 From da84d96176729fb48a8458561e5d8647103168b8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:18 +0200 Subject: sched: reintroduce cache-hot affinity reintroduce a simplified version of cache-hot/cold scheduling affinity. This improves performance with certain SMP workloads, such as sysbench. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8be5b57768c..fcc9a5ada1a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1415,6 +1415,7 @@ extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_batch_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; +extern unsigned int sysctl_sched_migration_cost; #endif extern unsigned int sysctl_sched_compat_yield; -- cgit v1.2.3 From cc367732ff0b1c63d0d7bdd11e6d1661794ef6a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:18 +0200 Subject: sched: debug, improve migration statistics add new migration statistics when SCHED_DEBUG and SCHEDSTATS is enabled. Available in /proc//sched. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index fcc9a5ada1a..3a6e05e7771 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -931,6 +931,24 @@ struct sched_entity { u64 block_max; u64 exec_max; u64 slice_max; + + u64 nr_migrations; + u64 nr_migrations_cold; + u64 nr_failed_migrations_affine; + u64 nr_failed_migrations_running; + u64 nr_failed_migrations_hot; + u64 nr_forced_migrations; + u64 nr_forced2_migrations; + + u64 nr_wakeups; + u64 nr_wakeups_sync; + u64 nr_wakeups_migrate; + u64 nr_wakeups_local; + u64 nr_wakeups_remote; + u64 nr_wakeups_affine; + u64 nr_wakeups_affine_attempts; + u64 nr_wakeups_passive; + u64 nr_wakeups_idle; #endif #ifdef CONFIG_FAIR_GROUP_SCHED -- cgit v1.2.3 From 95dbb421d12fdd9796ed153853daf3679809274f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:19 +0200 Subject: sched: reintroduce topology.h tunings reintroduce the 2.6.22 topology.h tunings again - they result in slightly better balancing. Signed-off-by: Ingo Molnar --- include/linux/topology.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 525d437b125..865a63e6557 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -98,7 +98,7 @@ .cache_nice_tries = 0, \ .busy_idx = 0, \ .idle_idx = 0, \ - .newidle_idx = 0, \ + .newidle_idx = 1, \ .wake_idx = 0, \ .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ @@ -128,15 +128,14 @@ .imbalance_pct = 125, \ .cache_nice_tries = 1, \ .busy_idx = 2, \ - .idle_idx = 0, \ - .newidle_idx = 0, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ | SD_SHARE_PKG_RESOURCES\ | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ @@ -159,15 +158,14 @@ .imbalance_pct = 125, \ .cache_nice_tries = 1, \ .busy_idx = 2, \ - .idle_idx = 0, \ - .newidle_idx = 0, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3 From 7a6c6bcee029a978f866511d6e41dbc7301fde4c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:19 +0200 Subject: sched: enable wake-idle on CONFIG_SCHED_MC=y most multicore CPUs today have shared L2 caches, so tune things so that the spreading amongst cores is more aggressive. Signed-off-by: Ingo Molnar --- include/linux/topology.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 865a63e6557..47729f18bfd 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -98,7 +98,7 @@ .cache_nice_tries = 0, \ .busy_idx = 0, \ .idle_idx = 0, \ - .newidle_idx = 1, \ + .newidle_idx = 0, \ .wake_idx = 0, \ .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ @@ -128,14 +128,15 @@ .imbalance_pct = 125, \ .cache_nice_tries = 1, \ .busy_idx = 2, \ - .idle_idx = 1, \ - .newidle_idx = 2, \ + .idle_idx = 0, \ + .newidle_idx = 0, \ .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ + | SD_WAKE_IDLE \ | SD_SHARE_PKG_RESOURCES\ | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ -- cgit v1.2.3 From 5e84cfde51cf303d368fcb48f22059f37b3872de Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 15 Oct 2007 17:00:19 +0200 Subject: sched: guest CPU accounting: add guest-CPU /proc/stat field as recent CPUs introduce a third running state, after "user" and "system", we need a new field, "guest", in cpustat to store the time used by the CPU to run virtual CPU. Modify /proc/stat to display this new field. Signed-off-by: Laurent Vivier Acked-by: Avi Kivity Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 43e895f1cab..12bf44f083f 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -23,6 +23,7 @@ struct cpu_usage_stat { cputime64_t idle; cputime64_t iowait; cputime64_t steal; + cputime64_t guest; }; struct kernel_stat { -- cgit v1.2.3 From 9ac52315d4cf5f561f36dabaf0720c00d3553162 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 15 Oct 2007 17:00:19 +0200 Subject: sched: guest CPU accounting: add guest-CPU /proc//stat fields like for cpustat, introduce the "gtime" (guest time of the task) and "cgtime" (guest time of the task children) fields for the tasks. Modify signal_struct and task_struct. Modify /proc//stat to display these new fields. Signed-off-by: Laurent Vivier Acked-by: Avi Kivity Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3a6e05e7771..fefce22e4c1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -517,6 +517,8 @@ struct signal_struct { * in __exit_signal, except for the group leader. */ cputime_t utime, stime, cutime, cstime; + cputime_t gtime; + cputime_t cgtime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1048,6 +1050,7 @@ struct task_struct { unsigned int rt_priority; cputime_t utime, stime; + cputime_t gtime; unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ -- cgit v1.2.3 From 94886b84b1bcdc95f34f70e7fce407efefe472e1 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 15 Oct 2007 17:00:19 +0200 Subject: sched: guest CPU accounting: maintain stats in account_system_time() modify account_system_time() to add cputime to cpustat->guest if we are running a VCPU. We add this cputime to cpustat->user instead of cpustat->system because this part of KVM code is in fact user code although it is executed in the kernel. We duplicate VCPU time between guest and user to allow an unmodified "top(1)" to display correct value. A modified "top(1)" is able to display good cpu user time and cpu guest time by subtracting cpu guest time from cpu user time. Update "gtime" in task_struct accordingly. Signed-off-by: Laurent Vivier Acked-by: Avi Kivity Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index fefce22e4c1..228e0a8ce24 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1342,6 +1342,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_STARTING 0x00000002 /* being created */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ +#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ -- cgit v1.2.3