From 01e3eb82278bf45221fc38b391bc5ee0f6a314d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 13:00:50 +0100 Subject: Revert "sched: improve preempt debugging" This reverts commit 7317d7b87edb41a9135e30be1ec3f7ef817c53dd. This has been reported (and bisected) by Alexey Zaytsev and Kamalesh Babulal to produce annoying warnings during bootup on both x86 and powerpc. kernel_locked() is not a valid test in IRQ context (we update the BKL's ->lock_depth and the preempt count separately and non-atomicalyy), so we cannot put it into the generic preempt debugging checks which can run in IRQ contexts too. Reported-and-bisected-by: Alexey Zaytsev Reported-and-bisected-by: Kamalesh Babulal Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 8be2c13b50d..3b630d88266 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4440,7 +4440,7 @@ void __kprobes sub_preempt_count(int val) /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) return; /* * Is the spinlock portion underflowing? -- cgit v1.2.3 From 5add95d4f7cf08f6f62510f19576992912387501 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:08 +0100 Subject: [CVE-2009-0029] System call wrappers part 06 Signed-off-by: Heiko Carstens --- kernel/sched.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 8be2c13b50d..1a0fdfa5ddf 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5126,7 +5126,7 @@ int can_nice(const struct task_struct *p, const int nice) * sys_setpriority is a more generic, but much slower function that * does similar things. */ -asmlinkage long sys_nice(int increment) +SYSCALL_DEFINE1(nice, int, increment) { long nice, retval; @@ -5433,8 +5433,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @policy: new policy. * @param: structure containing the new RT priority. */ -asmlinkage long -sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) +SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, + struct sched_param __user *, param) { /* negative values for policy are not valid */ if (policy < 0) @@ -5448,7 +5448,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @pid: the pid in question. * @param: structure containing the new RT priority. */ -asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) { return do_sched_setscheduler(pid, -1, param); } @@ -5457,7 +5457,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) * sys_sched_getscheduler - get the policy (scheduling class) of a thread * @pid: the pid in question. */ -asmlinkage long sys_sched_getscheduler(pid_t pid) +SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) { struct task_struct *p; int retval; @@ -5482,7 +5482,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) * @pid: the pid in question. * @param: structure containing the RT priority. */ -asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) { struct sched_param lp; struct task_struct *p; @@ -5600,8 +5600,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to the new cpu mask */ -asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { cpumask_var_t new_mask; int retval; @@ -5648,8 +5648,8 @@ out_unlock: * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask */ -asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { int ret; cpumask_var_t mask; @@ -5678,7 +5678,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, * This function yields the current CPU to other tasks. If there are no * other threads running on this CPU then this function will return. */ -asmlinkage long sys_sched_yield(void) +SYSCALL_DEFINE0(sched_yield) { struct rq *rq = this_rq_lock(); @@ -5819,7 +5819,7 @@ long __sched io_schedule_timeout(long timeout) * this syscall returns the maximum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_max(int policy) +SYSCALL_DEFINE1(sched_get_priority_max, int, policy) { int ret = -EINVAL; @@ -5844,7 +5844,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) * this syscall returns the minimum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_min(int policy) +SYSCALL_DEFINE1(sched_get_priority_min, int, policy) { int ret = -EINVAL; -- cgit v1.2.3 From 754fe8d297bfae7b77f7ce866e2fb0c5fb186506 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:09 +0100 Subject: [CVE-2009-0029] System call wrappers part 07 Signed-off-by: Heiko Carstens --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 1a0fdfa5ddf..65c02037b05 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,8 +5869,8 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -asmlinkage -long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) +SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, + struct timespec __user *, interval) { struct task_struct *p; unsigned int time_slice; -- cgit v1.2.3 From 17da2bd90abf428523de0fb98f7075e00e3ed42e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:10 +0100 Subject: [CVE-2009-0029] System call wrappers part 08 Signed-off-by: Heiko Carstens --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 65c02037b05..eb1931eef58 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,7 +5869,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, +SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, struct timespec __user *, interval) { struct task_struct *p; -- cgit v1.2.3 From 98a4826b99bc4bcc34c604b2fc4fcf4d771600ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2009 10:56:32 +0100 Subject: sched: fix bandwidth validation for UID grouping Impact: make rt-limit tunables work again Mark Glines reported: > I've got an issue on x86-64 where I can't configure the system to allow > RT tasks for a non-root user. > > In 2.6.26.5, I was able to do the following to set things up nicely: > echo 450000 >/sys/kernel/uids/0/cpu_rt_runtime > echo 450000 >/sys/kernel/uids/1000/cpu_rt_runtime > > Seems like every value I try to echo into the /sys files returns EINVAL. For UID grouping we initialize the root group with infinite bandwidth which by default is actually more than the global limit, therefore the bandwidth check always fails. Because the root group is a phantom group (for UID grouping) we cannot runtime adjust it, therefore we let it reflect the global bandwidth settings. Reported-by: Mark Glines Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 3b630d88266..ed62d1cee05 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9050,6 +9050,13 @@ static int tg_schedulable(struct task_group *tg, void *data) runtime = d->rt_runtime; } +#ifdef CONFIG_USER_SCHED + if (tg == &root_task_group) { + period = global_rt_period(); + runtime = global_rt_runtime(); + } +#endif + /* * Cannot have more runtime than the period. */ -- cgit v1.2.3 From cce7ade803699463ecc62a065ca522004f7ccb3d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:37 +0100 Subject: sched: SCHED_IDLE weight change Increase the SCHED_IDLE weight from 2 to 3, this gives much more stable vruntime numbers. time advanced in 100ms: weight=2 64765.988352 67012.881408 88501.412352 weight=3 35496.181411 34130.971298 35497.411573 Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index ed62d1cee05..6acfb3c2398 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1323,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) * slice expiry etc. */ -#define WEIGHT_IDLEPRIO 2 -#define WMULT_IDLEPRIO (1 << 31) +#define WEIGHT_IDLEPRIO 3 +#define WMULT_IDLEPRIO 1431655765 /* * Nice levels are multiplicative, with a gentle 10% change for every -- cgit v1.2.3