diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 4 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/futex_compat.c | 4 | ||||
-rw-r--r-- | kernel/irq/manage.c | 11 | ||||
-rw-r--r-- | kernel/kmod.c | 2 | ||||
-rw-r--r-- | kernel/power/Kconfig | 41 | ||||
-rw-r--r-- | kernel/ptrace.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 16 | ||||
-rw-r--r-- | kernel/sched_debug.c | 1 | ||||
-rw-r--r-- | kernel/sched_fair.c | 45 | ||||
-rw-r--r-- | kernel/signal.c | 19 | ||||
-rw-r--r-- | kernel/sys.c | 3 | ||||
-rw-r--r-- | kernel/time/ntp.c | 2 | ||||
-rw-r--r-- | kernel/user_namespace.c | 1 |
14 files changed, 98 insertions, 54 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 181ae708602..38033db8d8e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -273,7 +273,7 @@ int __cpuinit cpu_up(unsigned int cpu) return err; } -#ifdef CONFIG_SUSPEND_SMP +#ifdef CONFIG_PM_SLEEP_SMP static cpumask_t frozen_cpus; int disable_nonboot_cpus(void) @@ -334,4 +334,4 @@ void enable_nonboot_cpus(void) out: mutex_unlock(&cpu_add_remove_lock); } -#endif +#endif /* CONFIG_PM_SLEEP_SMP */ diff --git a/kernel/exit.c b/kernel/exit.c index 9578c1ae19c..06b24b3aa37 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -975,6 +975,7 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(tsk->audit_context)) audit_free(tsk); + tsk->exit_code = code; taskstats_exit(tsk, group_dead); exit_mm(tsk); @@ -996,7 +997,6 @@ fastcall NORET_TYPE void do_exit(long code) if (tsk->binfmt) module_put(tsk->binfmt->module); - tsk->exit_code = code; proc_exit_connector(tsk); exit_task_namespaces(tsk); exit_notify(tsk); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index f7921360efa..7e52eb051f2 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -61,10 +61,10 @@ void compat_exit_robust_list(struct task_struct *curr) if (fetch_robust_entry(&upending, &pending, &head->list_op_pending, &pip)) return; - if (upending) + if (pending) handle_futex_death((void __user *)pending + futex_offset, curr, pip); - while (compat_ptr(uentry) != &head->list) { + while (entry != (struct robust_list __user *) &head->list) { /* * A pending lock might already be on the list, so * dont process it twice: diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 853aefbd184..7230d914eaa 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -547,14 +547,11 @@ int request_irq(unsigned int irq, irq_handler_t handler, * We do this before actually registering it, to make sure that * a 'real' IRQ doesn't run in parallel with our fake */ - if (irqflags & IRQF_DISABLED) { - unsigned long flags; + unsigned long flags; - local_irq_save(flags); - handler(irq, dev_id); - local_irq_restore(flags); - } else - handler(irq, dev_id); + local_irq_save(flags); + handler(irq, dev_id); + local_irq_restore(flags); } #endif diff --git a/kernel/kmod.c b/kernel/kmod.c index 9809cc1f33d..c6a4f8aebeb 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -505,7 +505,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp, if (ret < 0) goto out; - return call_usermodehelper_exec(sub_info, 1); + return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); out: call_usermodehelper_freeinfo(sub_info); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 412859f8d94..c8580a1e687 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -72,15 +72,10 @@ config PM_TRACE CAUTION: this option will cause your machine's real-time clock to be set to an invalid time after a resume. -config SUSPEND_SMP_POSSIBLE - bool - depends on (X86 && !X86_VOYAGER) || (PPC64 && (PPC_PSERIES || PPC_PMAC)) - depends on SMP - default y - -config SUSPEND_SMP +config PM_SLEEP_SMP bool - depends on SUSPEND_SMP_POSSIBLE && PM_SLEEP + depends on SUSPEND_SMP_POSSIBLE || HIBERNATION_SMP_POSSIBLE + depends on PM_SLEEP select HOTPLUG_CPU default y @@ -89,20 +84,46 @@ config PM_SLEEP depends on SUSPEND || HIBERNATION default y +config SUSPEND_UP_POSSIBLE + bool + depends on (X86 && !X86_VOYAGER) || PPC || ARM || BLACKFIN || MIPS \ + || SUPERH || FRV + depends on !SMP + default y + +config SUSPEND_SMP_POSSIBLE + bool + depends on (X86 && !X86_VOYAGER) \ + || (PPC && (PPC_PSERIES || PPC_PMAC)) || ARM + depends on SMP + default y + config SUSPEND bool "Suspend to RAM and standby" depends on PM - depends on !SMP || SUSPEND_SMP_POSSIBLE + depends on SUSPEND_UP_POSSIBLE || SUSPEND_SMP_POSSIBLE default y ---help--- Allow the system to enter sleep states in which main memory is powered and thus its contents are preserved, such as the suspend-to-RAM state (i.e. the ACPI S3 state). +config HIBERNATION_UP_POSSIBLE + bool + depends on X86 || PPC64_SWSUSP || FRV || PPC32 + depends on !SMP + default y + +config HIBERNATION_SMP_POSSIBLE + bool + depends on (X86 && !X86_VOYAGER) || PPC64_SWSUSP + depends on SMP + default y + config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP - depends on ((X86 || PPC64_SWSUSP || FRV || PPC32) && !SMP) || SUSPEND_SMP_POSSIBLE + depends on HIBERNATION_UP_POSSIBLE || HIBERNATION_SMP_POSSIBLE ---help--- Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 82a558b655d..3eca7a55f2e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -233,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) /* Architecture-specific hardware disable .. */ ptrace_disable(child); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); write_lock_irq(&tasklist_lock); /* protect against de_thread()->release_task() */ diff --git a/kernel/sched.c b/kernel/sched.c index 9fe473a190d..deeb1f8e0c3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -668,7 +668,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) /* * Shift right and round: */ -#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) +#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, @@ -684,10 +684,10 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, * Check whether we'd overflow the 64-bit multiplication: */ if (unlikely(tmp > WMULT_CONST)) - tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, + tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, WMULT_SHIFT/2); else - tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); + tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } @@ -858,7 +858,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq) static void set_load_weight(struct task_struct *p) { - task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; p->se.wait_runtime = 0; if (task_has_rt_policy(p)) { @@ -1587,6 +1586,7 @@ static void __sched_fork(struct task_struct *p) p->se.wait_start_fair = 0; p->se.exec_start = 0; p->se.sum_exec_runtime = 0; + p->se.prev_sum_exec_runtime = 0; p->se.delta_exec = 0; p->se.delta_fair_run = 0; p->se.delta_fair_sleep = 0; @@ -2511,7 +2511,7 @@ group_next: * a think about bumping its value to force at least one task to be * moved */ - if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) { + if (*imbalance < busiest_load_per_task) { unsigned long tmp, pwr_now, pwr_move; unsigned int imbn; @@ -2563,10 +2563,8 @@ small_imbalance: pwr_move /= SCHED_LOAD_SCALE; /* Move if we gain throughput */ - if (pwr_move <= pwr_now) - goto out_balanced; - - *imbalance = busiest_load_per_task; + if (pwr_move > pwr_now) + *imbalance = busiest_load_per_task; } return busiest; diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index ab18f45f2ab..c3ee38bd342 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -283,4 +283,5 @@ void proc_sched_set_task(struct task_struct *p) p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; #endif p->se.sum_exec_runtime = 0; + p->se.prev_sum_exec_runtime = 0; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ee3771850aa..892616bf2c7 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -194,6 +194,8 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) update_load_add(&cfs_rq->load, se->load.weight); cfs_rq->nr_running++; se->on_rq = 1; + + schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); } static inline void @@ -205,6 +207,8 @@ __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) update_load_sub(&cfs_rq->load, se->load.weight); cfs_rq->nr_running--; se->on_rq = 0; + + schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime); } static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) @@ -291,7 +295,7 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity) /* * It will always fit into 'long': */ - return (long) (tmp >> WMULT_SHIFT); + return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT)); } static inline void @@ -354,7 +358,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) delta_fair = calc_delta_fair(delta_exec, lw); delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); - if (cfs_rq->sleeper_bonus > sysctl_sched_latency) { + if (cfs_rq->sleeper_bonus > sysctl_sched_min_granularity) { delta = min((u64)delta_mine, cfs_rq->sleeper_bonus); delta = min(delta, (unsigned long)( (long)sysctl_sched_runtime_limit - curr->wait_runtime)); @@ -489,6 +493,9 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned long delta_fair; + if (unlikely(!se->wait_start_fair)) + return; + delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit), (u64)(cfs_rq->fair_clock - se->wait_start_fair)); @@ -571,7 +578,6 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) prev_runtime = se->wait_runtime; __add_wait_runtime(cfs_rq, se, delta_fair); - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); delta_fair = se->wait_runtime - prev_runtime; /* @@ -659,7 +665,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) if (tsk->state & TASK_UNINTERRUPTIBLE) se->block_start = rq_of(cfs_rq)->clock; } - cfs_rq->wait_runtime -= se->wait_runtime; #endif } __dequeue_entity(cfs_rq, se); @@ -673,11 +678,31 @@ __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, struct sched_entity *curr, unsigned long granularity) { s64 __delta = curr->fair_key - se->fair_key; + unsigned long ideal_runtime, delta_exec; + + /* + * ideal_runtime is compared against sum_exec_runtime, which is + * walltime, hence do not scale. + */ + ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running, + (unsigned long)sysctl_sched_min_granularity); + + /* + * If we executed more than what the latency constraint suggests, + * reduce the rescheduling granularity. This way the total latency + * of how much a task is not scheduled converges to + * sysctl_sched_latency: + */ + delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; + if (delta_exec > ideal_runtime) + granularity = 0; /* * Take scheduling granularity into account - do not * preempt the current task unless the best task has * a larger than sched_granularity fairness advantage: + * + * scale granularity as key space is in fair_clock. */ if (__delta > niced_granularity(curr, granularity)) resched_task(rq_of(cfs_rq)->curr); @@ -696,6 +721,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_wait_end(cfs_rq, se); update_stats_curr_start(cfs_rq, se); set_cfs_rq_curr(cfs_rq, se); + se->prev_sum_exec_runtime = se->sum_exec_runtime; } static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) @@ -1076,31 +1102,32 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr) static void task_new_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); - struct sched_entity *se = &p->se; + struct sched_entity *se = &p->se, *curr = cfs_rq_curr(cfs_rq); sched_info_queued(p); + update_curr(cfs_rq); update_stats_enqueue(cfs_rq, se); /* * Child runs first: we let it run before the parent * until it reschedules once. We set up the key so that * it will preempt the parent: */ - p->se.fair_key = current->se.fair_key - - niced_granularity(&rq->curr->se, sched_granularity(cfs_rq)) - 1; + se->fair_key = curr->fair_key - + niced_granularity(curr, sched_granularity(cfs_rq)) - 1; /* * The first wait is dominated by the child-runs-first logic, * so do not credit it with that waiting time yet: */ if (sysctl_sched_features & SCHED_FEAT_SKIP_INITIAL) - p->se.wait_start_fair = 0; + se->wait_start_fair = 0; /* * The statistical average of wait_runtime is about * -granularity/2, so initialize the task with that: */ if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) - p->se.wait_runtime = -(sched_granularity(cfs_rq) / 2); + se->wait_runtime = -(sched_granularity(cfs_rq) / 2); __enqueue_entity(cfs_rq, se); } diff --git a/kernel/signal.c b/kernel/signal.c index ad63109e413..3169bed0b4d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1300,20 +1300,19 @@ struct sigqueue *sigqueue_alloc(void) void sigqueue_free(struct sigqueue *q) { unsigned long flags; + spinlock_t *lock = ¤t->sighand->siglock; + BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); /* * If the signal is still pending remove it from the - * pending queue. + * pending queue. We must hold ->siglock while testing + * q->list to serialize with collect_signal(). */ - if (unlikely(!list_empty(&q->list))) { - spinlock_t *lock = ¤t->sighand->siglock; - read_lock(&tasklist_lock); - spin_lock_irqsave(lock, flags); - if (!list_empty(&q->list)) - list_del_init(&q->list); - spin_unlock_irqrestore(lock, flags); - read_unlock(&tasklist_lock); - } + spin_lock_irqsave(lock, flags); + if (!list_empty(&q->list)) + list_del_init(&q->list); + spin_unlock_irqrestore(lock, flags); + q->flags &= ~SIGQUEUE_PREALLOC; __sigqueue_free(q); } diff --git a/kernel/sys.c b/kernel/sys.c index 449b81b98b3..1b33b05d346 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1442,7 +1442,6 @@ asmlinkage long sys_times(struct tms __user * tbuf) * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. * LBT 04.03.94 */ - asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) { struct task_struct *p; @@ -1470,7 +1469,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (!thread_group_leader(p)) goto out; - if (p->real_parent == group_leader) { + if (p->real_parent->tgid == group_leader->tgid) { err = -EPERM; if (task_session(p) != task_session(group_leader)) goto out; diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cd91237dbfe..de6a2d6b3eb 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -226,7 +226,7 @@ static void sync_cmos_clock(unsigned long dummy) static void notify_cmos_timer(void) { - if (no_sync_cmos_clock) + if (!no_sync_cmos_clock) mod_timer(&sync_cmos_timer, jiffies + 1); } diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index d055d987850..85af9422ea6 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -81,6 +81,7 @@ void free_user_ns(struct kref *kref) struct user_namespace *ns; ns = container_of(kref, struct user_namespace, kref); + free_uid(ns->root_user); kfree(ns); } |