From 50df5d6aea6694ca481b8005900401e8c95c2603 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 14 Mar 2008 16:09:59 +0100 Subject: sched: remove sysctl_sched_batch_wakeup_granularity it's unused. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6a1e7afb099..15f05ff453d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1551,7 +1551,6 @@ static inline void wake_up_idle_cpu(int cpu) { } extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_batch_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; -- cgit v1.2.3 From 57d3da2911787a101a384532f4519f9640bae883 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Feb 2008 14:05:10 +0100 Subject: time: add ns_to_ktime() Signed-off-by: Ingo Molnar --- include/linux/ktime.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 2cd7fa73d1a..ce5983225be 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -327,4 +327,10 @@ extern void ktime_get_ts(struct timespec *ts); /* Get the real (wall-) time in timespec format: */ #define ktime_get_real_ts(ts) getnstimeofday(ts) +static inline ktime_t ns_to_ktime(u64 ns) +{ + static const ktime_t ktime_zero = { .tv64 = 0 }; + return ktime_add_ns(ktime_zero, ns); +} + #endif -- cgit v1.2.3 From d0b27fa77854b149ad4af08b0fe47fe712a47ade Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 19 Apr 2008 19:44:57 +0200 Subject: sched: rt-group: synchonised bandwidth period Various SMP balancing algorithms require that the bandwidth period run in sync. Possible improvements are moving the rt_bandwidth thing into root_domain and keeping a span per rt_bandwidth which marks throttled cpus. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 15f05ff453d..be5d31752db 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1563,6 +1563,10 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; +int sched_rt_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); + extern unsigned int sysctl_sched_compat_yield; #ifdef CONFIG_RT_MUTEXES @@ -2052,6 +2056,9 @@ extern unsigned long sched_group_shares(struct task_group *tg); extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us); extern long sched_group_rt_runtime(struct task_group *tg); +extern int sched_group_set_rt_period(struct task_group *tg, + long rt_period_us); +extern long sched_group_rt_period(struct task_group *tg); #endif #endif -- cgit v1.2.3 From 30ca60c15a725f655e5d3f14e0238a066bc5aeb7 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 25 Mar 2008 15:06:55 -0700 Subject: cpumask: add cpumask_scnprintf_len function Add a new function cpumask_scnprintf_len() to return the number of characters needed to display "len" cpumask bits. The current method of allocating NR_CPUS bytes is incorrect as what's really needed is 9 characters per 32-bit word of cpumask bits (8 hex digits plus the seperator [','] or the terminating NULL.) This function provides the caller the means to allocate the correct string length. Cc: Paul Jackson Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- include/linux/bitmap.h | 1 + include/linux/cpumask.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index acad1105d94..1dbe074f1c6 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -108,6 +108,7 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits); extern int bitmap_scnprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); +extern int bitmap_scnprintf_len(unsigned int len); extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 7047f58306a..67e0e38d32b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -273,6 +273,13 @@ static inline int __cpumask_scnprintf(char *buf, int len, return bitmap_scnprintf(buf, len, srcp->bits, nbits); } +#define cpumask_scnprintf_len(len) \ + __cpumask_scnprintf_len((len)) +static inline int __cpumask_scnprintf_len(int len) +{ + return bitmap_scnprintf_len(len); +} + #define cpumask_parse_user(ubuf, ulen, dst) \ __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) static inline int __cpumask_parse_user(const char __user *buf, int len, -- cgit v1.2.3 From f9a86fcbbb1e5542eabf45c9144ac4b6330861a4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:07 -0700 Subject: cpuset: modify cpuset_set_cpus_allowed to use cpumask pointer * Modify cpuset_cpus_allowed to return the currently allowed cpuset via a pointer argument instead of as the function return value. * Use new set_cpus_allowed_ptr function. * Cleanup CPU_MASK_ALL and NODE_MASK_ALL uses. Depends on: [sched-devel]: sched: add new set_cpus_allowed_ptr function Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 0a26be353cb..726761e2400 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -20,8 +20,8 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); -extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p); +extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask); +extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -84,13 +84,14 @@ static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) +static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask) { - return cpu_possible_map; + *mask = cpu_possible_map; } -static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p) +static inline void cpuset_cpus_allowed_locked(struct task_struct *p, + cpumask_t *mask) { - return cpu_possible_map; + *mask = cpu_possible_map; } static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) -- cgit v1.2.3 From b53e921ba1cff8453dc9a87a84052fa12d5b30bd Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:08 -0700 Subject: generic: reduce stack pressure in sched_affinity * Modify sched_affinity functions to pass cpumask_t variables by reference instead of by value. * Use new set_cpus_allowed_ptr function. Depends on: [sched-devel]: sched: add new set_cpus_allowed_ptr function Cc: Paul Jackson Cc: Cliff Wickman Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index be5d31752db..383502dfda1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2034,7 +2034,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif -extern long sched_setaffinity(pid_t pid, cpumask_t new_mask); +extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); extern int sched_mc_power_savings, sched_smt_power_savings; -- cgit v1.2.3 From 7c16ec585c558960a508ccf9a08fcb9ed49b3754 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:11 -0700 Subject: cpumask: reduce stack usage in SD_x_INIT initializers * Remove empty cpumask_t (and all non-zero/non-null) variables in SD_*_INIT macros. Use memset(0) to clear. Also, don't inline the initializer functions to save on stack space in build_sched_domains(). * Merge change to include/linux/topology.h that uses the new node_to_cpumask_ptr function in the nr_cpus_node macro into this patch. Depends on: [mm-patch]: asm-generic-add-node_to_cpumask_ptr-macro.patch [sched-devel]: sched: add new set_cpus_allowed_ptr function Cc: H. Peter Anvin Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- include/linux/topology.h | 46 ++++++++-------------------------------------- 1 file changed, 8 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index bd14f8b30f0..4bb7074a2c3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -38,16 +38,15 @@ #endif #ifndef nr_cpus_node -#define nr_cpus_node(node) \ - ({ \ - cpumask_t __tmp__; \ - __tmp__ = node_to_cpumask(node); \ - cpus_weight(__tmp__); \ +#define nr_cpus_node(node) \ + ({ \ + node_to_cpumask_ptr(__tmp__, node); \ + cpus_weight(*__tmp__); \ }) #endif -#define for_each_node_with_cpus(node) \ - for_each_online_node(node) \ +#define for_each_node_with_cpus(node) \ + for_each_online_node(node) \ if (nr_cpus_node(node)) void arch_update_cpu_topology(void); @@ -80,7 +79,9 @@ void arch_update_cpu_topology(void); * by defining their own arch-specific initializer in include/asm/topology.h. * A definition there will automagically override these default initializers * and allow arch-specific performance tuning of sched_domains. + * (Only non-zero and non-null fields need be specified.) */ + #ifdef CONFIG_SCHED_SMT /* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is, * so can't we drop this in favor of CONFIG_SCHED_SMT? @@ -89,20 +90,10 @@ void arch_update_cpu_topology(void); /* Common values for SMT siblings */ #ifndef SD_SIBLING_INIT #define SD_SIBLING_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .child = NULL, \ - .groups = NULL, \ .min_interval = 1, \ .max_interval = 2, \ .busy_factor = 64, \ .imbalance_pct = 110, \ - .cache_nice_tries = 0, \ - .busy_idx = 0, \ - .idle_idx = 0, \ - .newidle_idx = 0, \ - .wake_idx = 0, \ - .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_FORK \ @@ -112,7 +103,6 @@ void arch_update_cpu_topology(void); | SD_SHARE_CPUPOWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ - .nr_balance_failed = 0, \ } #endif #endif /* CONFIG_SCHED_SMT */ @@ -121,18 +111,12 @@ void arch_update_cpu_topology(void); /* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */ #ifndef SD_MC_INIT #define SD_MC_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .child = NULL, \ - .groups = NULL, \ .min_interval = 1, \ .max_interval = 4, \ .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_nice_tries = 1, \ .busy_idx = 2, \ - .idle_idx = 0, \ - .newidle_idx = 0, \ .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ @@ -144,7 +128,6 @@ void arch_update_cpu_topology(void); | BALANCE_FOR_MC_POWER, \ .last_balance = jiffies, \ .balance_interval = 1, \ - .nr_balance_failed = 0, \ } #endif #endif /* CONFIG_SCHED_MC */ @@ -152,10 +135,6 @@ void arch_update_cpu_topology(void); /* Common values for CPUs */ #ifndef SD_CPU_INIT #define SD_CPU_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .child = NULL, \ - .groups = NULL, \ .min_interval = 1, \ .max_interval = 4, \ .busy_factor = 64, \ @@ -174,16 +153,11 @@ void arch_update_cpu_topology(void); | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ - .nr_balance_failed = 0, \ } #endif /* sched_domains SD_ALLNODES_INIT for NUMA machines */ #define SD_ALLNODES_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .child = NULL, \ - .groups = NULL, \ .min_interval = 64, \ .max_interval = 64*num_online_cpus(), \ .busy_factor = 128, \ @@ -191,14 +165,10 @@ void arch_update_cpu_topology(void); .cache_nice_tries = 1, \ .busy_idx = 3, \ .idle_idx = 3, \ - .newidle_idx = 0, /* unused */ \ - .wake_idx = 0, /* unused */ \ - .forkexec_idx = 0, /* unused */ \ .flags = SD_LOAD_BALANCE \ | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ - .nr_balance_failed = 0, \ } #ifdef CONFIG_NUMA -- cgit v1.2.3 From 321a8e9dcb714f3c350ba55e41ed447bf3f05fac Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:02 -0700 Subject: cpumask: add CPU_MASK_ALL_PTR macro * Add a static cpumask_t variable "CPU_MASK_ALL_PTR" to use as a pointer reference to CPU_MASK_ALL. This reduces where possible the instances where CPU_MASK_ALL allocates and fills a large array on the stack. Used only if NR_CPUS > BITS_PER_LONG. * Change init/main.c to use new set_cpus_allowed_ptr(). Depends on: [sched-devel]: sched: add new set_cpus_allowed_ptr function Cc: H. Peter Anvin Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 67e0e38d32b..629102feaa6 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -243,6 +243,8 @@ int __next_cpu(int n, const cpumask_t *srcp); [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } } +#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL) + #else #define CPU_MASK_ALL \ @@ -251,6 +253,10 @@ int __next_cpu(int n, const cpumask_t *srcp); [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } } +/* cpu_mask_all is in init/main.c */ +extern cpumask_t cpu_mask_all; +#define CPU_MASK_ALL_PTR (&cpu_mask_all) + #endif #define CPU_MASK_NONE \ -- cgit v1.2.3 From 9f0e8d0400d925c3acd5f4e01dbeb736e4011882 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 4 Apr 2008 18:11:01 -0700 Subject: x86: convert cpumask_of_cpu macro to allocated array * Here is a simple patch to use an allocated array of cpumasks to represent cpumask_of_cpu() instead of constructing one on the stack. It's based on the Kconfig option "HAVE_CPUMASK_OF_CPU_MAP" which is currently only set for x86_64 SMP. Otherwise the the existing cpumask_of_cpu() is used but has been changed to produce an lvalue so a pointer to it can be used. Cc: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 629102feaa6..259c8051155 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -222,8 +222,13 @@ int __next_cpu(int n, const cpumask_t *srcp); #define next_cpu(n, src) ({ (void)(src); 1; }) #endif +#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP +extern cpumask_t *cpumask_of_cpu_map; +#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu]) + +#else #define cpumask_of_cpu(cpu) \ -({ \ +(*({ \ typeof(_unused_cpumask_arg_) m; \ if (sizeof(m) == sizeof(unsigned long)) { \ m.bits[0] = 1UL<<(cpu); \ @@ -231,8 +236,9 @@ int __next_cpu(int n, const cpumask_t *srcp); cpus_clear(m); \ cpu_set((cpu), m); \ } \ - m; \ -}) + &m; \ +})) +#endif #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) -- cgit v1.2.3 From 9d1fe3236a1d64ab687e16b4cbbaa1383352a2c1 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 8 Apr 2008 11:43:04 -0700 Subject: cpumask: add show cpu map functions * Add cpu_sysdev_class functions to display the following maps with cpulist_scnprintf(). cpu_online_map cpu_present_map cpu_possible_map * Small change to include/linux/sysdev.h to allow the attribute name and label to be different (to avoid collision with the "attr_online" entry for bringing cpus on- and off-line.) Cc: H. Peter Anvin Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- include/linux/sysdev.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h index f752e73bf97..f2767bc6b73 100644 --- a/include/linux/sysdev.h +++ b/include/linux/sysdev.h @@ -45,12 +45,16 @@ struct sysdev_class_attribute { ssize_t (*store)(struct sysdev_class *, const char *, size_t); }; -#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \ -struct sysdev_class_attribute attr_##_name = { \ +#define _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \ +{ \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ -}; +} + +#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) \ + struct sysdev_class_attribute attr_##_name = \ + _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) extern int sysdev_class_register(struct sysdev_class *); @@ -100,15 +104,16 @@ struct sysdev_attribute { }; -#define _SYSDEV_ATTR(_name,_mode,_show,_store) \ +#define _SYSDEV_ATTR(_name, _mode, _show, _store) \ { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } -#define SYSDEV_ATTR(_name,_mode,_show,_store) \ -struct sysdev_attribute attr_##_name = _SYSDEV_ATTR(_name,_mode,_show,_store); +#define SYSDEV_ATTR(_name, _mode, _show, _store) \ + struct sysdev_attribute attr_##_name = \ + _SYSDEV_ATTR(_name, _mode, _show, _store); extern int sysdev_create_file(struct sys_device *, struct sysdev_attribute *); extern void sysdev_remove_file(struct sys_device *, struct sysdev_attribute *); -- cgit v1.2.3 From cd8ba7cd9be0192348c2836cb6645d9b2cd2bfd2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 26 Mar 2008 14:23:49 -0700 Subject: sched: add new set_cpus_allowed_ptr function Add a new function that accepts a pointer to the "newly allowed cpus" cpumask argument. int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) The current set_cpus_allowed() function is modified to use the above but this does not result in an ABI change. And with some compiler optimization help, it may not introduce any additional overhead. Additionally, to enforce the read only nature of the new_mask arg, the "const" property is migrated to sub-functions called by set_cpus_allowed. This silences compiler warnings. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- include/linux/sched.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 383502dfda1..79c025c3b62 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -889,7 +889,8 @@ struct sched_class { void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); void (*task_new) (struct rq *rq, struct task_struct *p); - void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask); + void (*set_cpus_allowed)(struct task_struct *p, + const cpumask_t *newmask); void (*join_domain)(struct rq *rq); void (*leave_domain)(struct rq *rq); @@ -1502,15 +1503,21 @@ static inline void put_task_struct(struct task_struct *t) #define used_math() tsk_used_math(current) #ifdef CONFIG_SMP -extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask); +extern int set_cpus_allowed_ptr(struct task_struct *p, + const cpumask_t *new_mask); #else -static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) +static inline int set_cpus_allowed_ptr(struct task_struct *p, + const cpumask_t *new_mask) { - if (!cpu_isset(0, new_mask)) + if (!cpu_isset(0, *new_mask)) return -EINVAL; return 0; } #endif +static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) +{ + return set_cpus_allowed_ptr(p, &new_mask); +} extern unsigned long long sched_clock(void); -- cgit v1.2.3 From ec7dc8ac73e4a56ed03b673f026f08c0d547f597 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Sat, 19 Apr 2008 19:44:59 +0200 Subject: sched: allow the group scheduler to have multiple levels This patch makes the group scheduler multi hierarchy aware. [a.p.zijlstra@chello.nl: rt-parts and assorted fixes] Signed-off-by: Dhaval Giani Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 79c025c3b62..fa14781747c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2052,7 +2052,7 @@ extern void normalize_rt_tasks(void); extern struct task_group init_task_group; -extern struct task_group *sched_create_group(void); +extern struct task_group *sched_create_group(struct task_group *parent); extern void sched_destroy_group(struct task_group *tg); extern void sched_move_task(struct task_struct *tsk); #ifdef CONFIG_FAIR_GROUP_SCHED -- cgit v1.2.3 From eff766a65c60237bfa865160c3129de31fab591b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 19 Apr 2008 19:45:00 +0200 Subject: sched: fix the task_group hierarchy for UID grouping UID grouping doesn't actually have a task_group representing the root of the task_group tree. Add one. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index fa14781747c..ada24022d23 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2051,6 +2051,9 @@ extern void normalize_rt_tasks(void); #ifdef CONFIG_GROUP_SCHED extern struct task_group init_task_group; +#ifdef CONFIG_USER_SCHED +extern struct task_group root_task_group; +#endif extern struct task_group *sched_create_group(struct task_group *parent); extern void sched_destroy_group(struct task_group *tg); -- cgit v1.2.3 From 1d3504fcf5606579d60b649d19f44b3871c1ddae Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 15 Apr 2008 14:04:23 +0900 Subject: sched, cpuset: customize sched domains, core [rebased for sched-devel/latest] - Add a new cpuset file, having levels: sched_relax_domain_level - Modify partition_sched_domains() and build_sched_domains() to take attributes parameter passed from cpuset. - Fill newidle_idx for node domains which currently unused but might be required if sched_relax_domain_level become higher. - We can change the default level by boot option 'relax_domain_level='. Signed-off-by: Hidetoshi Seto Signed-off-by: Ingo Molnar --- include/linux/sched.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ada24022d23..11f47249cdd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -704,6 +704,7 @@ enum cpu_idle_type { #define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ #define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ +#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ #define BALANCE_FOR_MC_POWER \ (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) @@ -733,6 +734,24 @@ struct sched_group { u32 reciprocal_cpu_power; }; +enum sched_domain_level { + SD_LV_NONE = 0, + SD_LV_SIBLING, + SD_LV_MC, + SD_LV_CPU, + SD_LV_NODE, + SD_LV_ALLNODES, + SD_LV_MAX +}; + +struct sched_domain_attr { + int relax_domain_level; +}; + +#define SD_ATTR_INIT (struct sched_domain_attr) { \ + .relax_domain_level = -1, \ +} + struct sched_domain { /* These fields must be setup */ struct sched_domain *parent; /* top domain must be null terminated */ @@ -750,6 +769,7 @@ struct sched_domain { unsigned int wake_idx; unsigned int forkexec_idx; int flags; /* See SD_* */ + enum sched_domain_level level; /* Runtime fields. */ unsigned long last_balance; /* init to jiffies. units in jiffies */ @@ -789,7 +809,8 @@ struct sched_domain { #endif }; -extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new); +extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, + struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); #endif /* CONFIG_SMP */ -- cgit v1.2.3 From 18d95a2832c1392a2d63227a7a6d433cb9f2037e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 19 Apr 2008 19:45:00 +0200 Subject: sched: fair-group: SMP-nice for group scheduling Implement SMP nice support for the full group hierarchy. On each load-balance action, compile a sched_domain wide view of the full task_group tree. We compute the domain wide view when walking down the hierarchy, and readjust the weights when walking back up. After collecting and readjusting the domain wide view, we try to balance the tasks within the task_groups. The current approach is a naively balance each task group until we've moved the targeted amount of load. Inspired by Srivatsa Vaddsgiri's previous code and Abhishek Chandra's H-SMP paper. XXX: there will be some numerical issues due to the limited nature of SCHED_LOAD_SCALE wrt to representing a task_groups influence on the total weight. When the tree is deep enough, or the task weight small enough, we'll run out of bits. Signed-off-by: Peter Zijlstra CC: Abhishek Chandra CC: Srivatsa Vaddagiri Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 11f47249cdd..0a32059e6ed 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -758,6 +758,7 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ + int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.3 From 58d6c2d72f8628f39e8689fbde8aa177fcf00a37 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 19 Apr 2008 19:45:00 +0200 Subject: sched: rt-group: optimize dequeue_rt_stack Now that the group hierarchy can have an arbitrary depth the O(n^2) nature of RT task dequeues will really hurt. Optimize this by providing space to store the tree path, so we can walk it the other way. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0a32059e6ed..887f5db8942 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1005,6 +1005,7 @@ struct sched_rt_entity { unsigned long timeout; int nr_cpus_allowed; + struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ -- cgit v1.2.3 From 4a55bd5e97b1775913f88f11108a4f144f590e89 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 19 Apr 2008 19:45:00 +0200 Subject: sched: fair-group: de-couple load-balancing from the rb-trees De-couple load-balancing from the rb-trees, so that I can change their organization. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 3 +++ include/linux/sched.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1f74e1d7415..37a6f5bc4a9 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -151,6 +151,9 @@ extern struct group_info init_groups; .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ .active_mm = &init_mm, \ + .se = { \ + .group_node = LIST_HEAD_INIT(tsk.se.group_node), \ + }, \ .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ .time_slice = HZ, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 887f5db8942..be6914014c7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -946,6 +946,7 @@ struct load_weight { struct sched_entity { struct load_weight load; /* for load-balancing */ struct rb_node run_node; + struct list_head group_node; unsigned int on_rq; u64 exec_start; -- cgit v1.2.3