diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 24 | ||||
-rw-r--r-- | kernel/exit.c | 8 | ||||
-rw-r--r-- | kernel/futex.c | 1 | ||||
-rw-r--r-- | kernel/irq/chip.c | 5 | ||||
-rw-r--r-- | kernel/irq/manage.c | 2 | ||||
-rw-r--r-- | kernel/irq/resend.c | 12 | ||||
-rw-r--r-- | kernel/kprobes.c | 5 | ||||
-rw-r--r-- | kernel/posix-timers.c | 9 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 3 | ||||
-rw-r--r-- | kernel/printk.c | 15 | ||||
-rw-r--r-- | kernel/profile.c | 4 | ||||
-rw-r--r-- | kernel/sched.c | 402 | ||||
-rw-r--r-- | kernel/sched_debug.c | 30 | ||||
-rw-r--r-- | kernel/sched_fair.c | 225 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 10 | ||||
-rw-r--r-- | kernel/sched_rt.c | 48 | ||||
-rw-r--r-- | kernel/signal.c | 8 | ||||
-rw-r--r-- | kernel/sysctl.c | 3 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 10 |
19 files changed, 406 insertions, 418 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index a777d376141..04f3ffb8d9d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1992,19 +1992,19 @@ int __audit_signal_info(int sig, struct task_struct *t) extern uid_t audit_sig_uid; extern u32 audit_sig_sid; - if (audit_pid && t->tgid == audit_pid && - (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1)) { - audit_sig_pid = tsk->pid; - if (ctx) - audit_sig_uid = ctx->loginuid; - else - audit_sig_uid = tsk->uid; - selinux_get_task_sid(tsk, &audit_sig_sid); + if (audit_pid && t->tgid == audit_pid) { + if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { + audit_sig_pid = tsk->pid; + if (ctx) + audit_sig_uid = ctx->loginuid; + else + audit_sig_uid = tsk->uid; + selinux_get_task_sid(tsk, &audit_sig_sid); + } + if (!audit_signals || audit_dummy_context()) + return 0; } - if (!audit_signals) /* audit_context checked in wrapper */ - return 0; - /* optimize the common case by putting first signal recipient directly * in audit_context */ if (!ctx->target_pid) { @@ -2023,7 +2023,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->d.next = ctx->aux_pids; ctx->aux_pids = (void *)axp; } - BUG_ON(axp->pid_count > AUDIT_AUX_PIDS); + BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS); axp->target_pid[axp->pid_count] = t->tgid; selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); diff --git a/kernel/exit.c b/kernel/exit.c index 464c2b172f0..9578c1ae19c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -813,7 +813,7 @@ static void exit_notify(struct task_struct *tsk) __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp); } - /* Let father know we died + /* Let father know we died * * Thread signals are configurable, but you aren't going to use * that to send signals to arbitary processes. @@ -826,9 +826,7 @@ static void exit_notify(struct task_struct *tsk) * If our self_exec id doesn't match our parent_exec_id then * we have changed execution domain as these two values started * the same after a fork. - * */ - if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && ( tsk->parent_exec_id != t->self_exec_id || tsk->self_exec_id != tsk->parent_exec_id) @@ -848,9 +846,7 @@ static void exit_notify(struct task_struct *tsk) } state = EXIT_ZOMBIE; - if (tsk->exit_signal == -1 && - (likely(tsk->ptrace == 0) || - unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT))) + if (tsk->exit_signal == -1 && likely(!tsk->ptrace)) state = EXIT_DEAD; tsk->exit_state = state; diff --git a/kernel/futex.c b/kernel/futex.c index 3415e9ad139..e8935b195e8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1670,6 +1670,7 @@ pi_faulted: attempt); if (ret) goto out; + uval = 0; goto retry_unlocked; } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 615ce97c6cf..f1a73f0b54e 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -352,13 +352,10 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) * keep it masked and get out of here */ action = desc->action; - if (unlikely(!action || (desc->status & IRQ_DISABLED))) { - desc->status |= IRQ_PENDING; + if (unlikely(!action || (desc->status & IRQ_DISABLED))) goto out_unlock; - } desc->status |= IRQ_INPROGRESS; - desc->status &= ~IRQ_PENDING; spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 203a518b6f1..853aefbd184 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -462,7 +462,9 @@ void free_irq(unsigned int irq, void *dev_id) * We do this after actually deregistering it, to make sure that * a 'real' IRQ doesn't run in parallel with our fake */ + local_irq_save(flags); handler(irq, dev_id); + local_irq_restore(flags); } #endif } diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index c3827274688..a8046791ba2 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -63,15 +63,11 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) desc->chip->enable(irq); /* - * Temporary hack to figure out more about the problem, which - * is causing the ancient network cards to die. + * We do not resend level type interrupts. Level type + * interrupts are resent by hardware when they are still + * active. */ - if (desc->handle_irq != handle_edge_irq) { - WARN_ON_ONCE(1); - return; - } - - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; if (!desc->chip || !desc->chip->retrigger || diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3e9f513a728..4b8a4493c54 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1063,6 +1063,11 @@ EXPORT_SYMBOL_GPL(register_kprobe); EXPORT_SYMBOL_GPL(unregister_kprobe); EXPORT_SYMBOL_GPL(register_jprobe); EXPORT_SYMBOL_GPL(unregister_jprobe); +#ifdef CONFIG_KPROBES EXPORT_SYMBOL_GPL(jprobe_return); +#endif + +#ifdef CONFIG_KPROBES EXPORT_SYMBOL_GPL(register_kretprobe); EXPORT_SYMBOL_GPL(unregister_kretprobe); +#endif diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 55b3761edaa..7a15afb73ed 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -547,9 +547,9 @@ sys_timer_create(const clockid_t which_clock, new_timer->it_process = process; list_add(&new_timer->list, &process->signal->posix_timers); - spin_unlock_irqrestore(&process->sighand->siglock, flags); if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) get_task_struct(process); + spin_unlock_irqrestore(&process->sighand->siglock, flags); } else { spin_unlock_irqrestore(&process->sighand->siglock, flags); process = NULL; @@ -605,13 +605,14 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); if (timr) { spin_lock(&timr->it_lock); - spin_unlock(&idr_lock); if ((timr->it_id != timer_id) || !(timr->it_process) || timr->it_process->tgid != current->tgid) { - unlock_timer(timr, *flags); + spin_unlock(&timr->it_lock); + spin_unlock_irqrestore(&idr_lock, *flags); timr = NULL; - } + } else + spin_unlock(&idr_lock); } else spin_unlock_irqrestore(&idr_lock, *flags); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index a3b7854b8f7..a686590d88c 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -709,7 +709,8 @@ static void mark_nosave_pages(struct memory_bitmap *bm) region->end_pfn << PAGE_SHIFT); for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) - memory_bm_set_bit(bm, pfn); + if (pfn_valid(pfn)) + memory_bm_set_bit(bm, pfn); } } diff --git a/kernel/printk.c b/kernel/printk.c index 051d27e36a6..8451dfc31d2 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -732,7 +732,7 @@ int __init add_preferred_console(char *name, int idx, char *options) return 0; } -int __init update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) +int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options) { struct console_cmdline *c; int i; @@ -1083,6 +1083,19 @@ int unregister_console(struct console *console) } EXPORT_SYMBOL(unregister_console); +static int __init disable_boot_consoles(void) +{ + if (console_drivers != NULL) { + if (console_drivers->flags & CON_BOOT) { + printk(KERN_INFO "turn off boot console %s%d\n", + console_drivers->name, console_drivers->index); + return unregister_console(console_drivers); + } + } + return 0; +} +late_initcall(disable_boot_consoles); + /** * tty_write_message - write a message to a certain tty, not just the console. * @tty: the destination tty_struct diff --git a/kernel/profile.c b/kernel/profile.c index 5b20fe977be..cb1e37d2dac 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -199,11 +199,11 @@ EXPORT_SYMBOL_GPL(register_timer_hook); EXPORT_SYMBOL_GPL(unregister_timer_hook); EXPORT_SYMBOL_GPL(task_handoff_register); EXPORT_SYMBOL_GPL(task_handoff_unregister); +EXPORT_SYMBOL_GPL(profile_event_register); +EXPORT_SYMBOL_GPL(profile_event_unregister); #endif /* CONFIG_PROFILING */ -EXPORT_SYMBOL_GPL(profile_event_register); -EXPORT_SYMBOL_GPL(profile_event_unregister); #ifdef CONFIG_SMP /* diff --git a/kernel/sched.c b/kernel/sched.c index 72bb9483d94..45e17b83b7f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -263,6 +263,7 @@ struct rq { unsigned int clock_warps, clock_overflows; unsigned int clock_unstable_events; + u64 tick_timestamp; atomic_t nr_iowait; @@ -318,15 +319,19 @@ static inline int cpu_of(struct rq *rq) } /* - * Per-runqueue clock, as finegrained as the platform can give us: + * Update the per-runqueue clock, as finegrained as the platform can give + * us, but without assuming monotonicity, etc.: */ -static unsigned long long __rq_clock(struct rq *rq) +static void __update_rq_clock(struct rq *rq) { u64 prev_raw = rq->prev_clock_raw; u64 now = sched_clock(); s64 delta = now - prev_raw; u64 clock = rq->clock; +#ifdef CONFIG_SCHED_DEBUG + WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); +#endif /* * Protect against sched_clock() occasionally going backwards: */ @@ -337,8 +342,11 @@ static unsigned long long __rq_clock(struct rq *rq) /* * Catch too large forward jumps too: */ - if (unlikely(delta > 2*TICK_NSEC)) { - clock++; + if (unlikely(clock + delta > rq->tick_timestamp + TICK_NSEC)) { + if (clock < rq->tick_timestamp + TICK_NSEC) + clock = rq->tick_timestamp + TICK_NSEC; + else + clock++; rq->clock_overflows++; } else { if (unlikely(delta > rq->clock_max_delta)) @@ -349,18 +357,12 @@ static unsigned long long __rq_clock(struct rq *rq) rq->prev_clock_raw = now; rq->clock = clock; - - return clock; } -static inline unsigned long long rq_clock(struct rq *rq) +static void update_rq_clock(struct rq *rq) { - int this_cpu = smp_processor_id(); - - if (this_cpu == cpu_of(rq)) - return __rq_clock(rq); - - return rq->clock; + if (likely(smp_processor_id() == cpu_of(rq))) + __update_rq_clock(rq); } /* @@ -386,9 +388,12 @@ unsigned long long cpu_clock(int cpu) { unsigned long long now; unsigned long flags; + struct rq *rq; local_irq_save(flags); - now = rq_clock(cpu_rq(cpu)); + rq = cpu_rq(cpu); + update_rq_clock(rq); + now = rq->clock; local_irq_restore(flags); return now; @@ -637,6 +642,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) #define WMULT_SHIFT 32 +/* + * Shift right and round: + */ +#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) + static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw) @@ -644,18 +654,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, u64 tmp; if (unlikely(!lw->inv_weight)) - lw->inv_weight = WMULT_CONST / lw->weight; + lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1; tmp = (u64)delta_exec * weight; /* * Check whether we'd overflow the 64-bit multiplication: */ - if (unlikely(tmp > WMULT_CONST)) { - tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight) - >> (WMULT_SHIFT/2); - } else { - tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; - } + if (unlikely(tmp > WMULT_CONST)) + tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, + WMULT_SHIFT/2); + else + tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } @@ -703,11 +712,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec) * the relative distance between them is ~25%.) */ static const int prio_to_weight[40] = { -/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, -/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, -/* 0 */ NICE_0_LOAD /* 1024 */, -/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, -/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, + /* -20 */ 88761, 71755, 56483, 46273, 36291, + /* -15 */ 29154, 23254, 18705, 14949, 11916, + /* -10 */ 9548, 7620, 6100, 4904, 3906, + /* -5 */ 3121, 2501, 1991, 1586, 1277, + /* 0 */ 1024, 820, 655, 526, 423, + /* 5 */ 335, 272, 215, 172, 137, + /* 10 */ 110, 87, 70, 56, 45, + /* 15 */ 36, 29, 23, 18, 15, }; /* @@ -718,14 +730,14 @@ static const int prio_to_weight[40] = { * into multiplications: */ static const u32 prio_to_wmult[40] = { -/* -20 */ 48356, 60446, 75558, 94446, 118058, -/* -15 */ 147573, 184467, 230589, 288233, 360285, -/* -10 */ 450347, 562979, 703746, 879575, 1099582, -/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, -/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, -/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, -/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, -/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, + /* -20 */ 48388, 59856, 76040, 92818, 118348, + /* -15 */ 147320, 184698, 229616, 287308, 360437, + /* -10 */ 449829, 563644, 704093, 875809, 1099582, + /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326, + /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587, + /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126, + /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, + /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); @@ -745,8 +757,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator); + int *this_best_prio, struct rq_iterator *iterator); #include "sched_stats.h" #include "sched_rt.c" @@ -782,14 +793,14 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls) * This function is called /before/ updating rq->ls.load * and when switching tasks. */ -static void update_curr_load(struct rq *rq, u64 now) +static void update_curr_load(struct rq *rq) { struct load_stat *ls = &rq->ls; u64 start; start = ls->load_update_start; - ls->load_update_start = now; - ls->delta_stat += now - start; + ls->load_update_start = rq->clock; + ls->delta_stat += rq->clock - start; /* * Stagger updates to ls->delta_fair. Very frequent updates * can be expensive. @@ -798,30 +809,28 @@ static void update_curr_load(struct rq *rq, u64 now) __update_curr_load(rq, ls); } -static inline void -inc_load(struct rq *rq, const struct task_struct *p, u64 now) +static inline void inc_load(struct rq *rq, const struct task_struct *p) { - update_curr_load(rq, now); + update_curr_load(rq); update_load_add(&rq->ls.load, p->se.load.weight); } -static inline void -dec_load(struct rq *rq, const struct task_struct *p, u64 now) +static inline void dec_load(struct rq *rq, const struct task_struct *p) { - update_curr_load(rq, now); + update_curr_load(rq); update_load_sub(&rq->ls.load, p->se.load.weight); } -static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) +static void inc_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running++; - inc_load(rq, p, now); + inc_load(rq, p); } -static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) +static void dec_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running--; - dec_load(rq, p, now); + dec_load(rq, p); } static void set_load_weight(struct task_struct *p) @@ -848,18 +857,16 @@ static void set_load_weight(struct task_struct *p) p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; } -static void -enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) { sched_info_queued(p); - p->sched_class->enqueue_task(rq, p, wakeup, now); + p->sched_class->enqueue_task(rq, p, wakeup); p->se.on_rq = 1; } -static void -dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) { - p->sched_class->dequeue_task(rq, p, sleep, now); + p->sched_class->dequeue_task(rq, p, sleep); p->se.on_rq = 0; } @@ -914,13 +921,11 @@ static int effective_prio(struct task_struct *p) */ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) { - u64 now = rq_clock(rq); - if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - enqueue_task(rq, p, wakeup, now); - inc_nr_running(p, rq, now); + enqueue_task(rq, p, wakeup); + inc_nr_running(p, rq); } /* @@ -928,13 +933,13 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) */ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) { - u64 now = rq_clock(rq); + update_rq_clock(rq); if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - enqueue_task(rq, p, 0, now); - inc_nr_running(p, rq, now); + enqueue_task(rq, p, 0); + inc_nr_running(p, rq); } /* @@ -942,13 +947,11 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) */ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) { - u64 now = rq_clock(rq); - if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; - dequeue_task(rq, p, sleep, now); - dec_nr_running(p, rq, now); + dequeue_task(rq, p, sleep); + dec_nr_running(p, rq); } /** @@ -1516,6 +1519,7 @@ out_set_cpu: out_activate: #endif /* CONFIG_SMP */ + update_rq_clock(rq); activate_task(rq, p, 1); /* * Sync wakeups (i.e. those types of wakeups where the waker @@ -1647,12 +1651,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) unsigned long flags; struct rq *rq; int this_cpu; - u64 now; rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); /* parent's CPU */ - now = rq_clock(rq); + update_rq_clock(rq); p->prio = effective_prio(p); @@ -1666,8 +1669,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * Let the scheduling class do new task startup * management (if any): */ - p->sched_class->task_new(rq, p, now); - inc_nr_running(p, rq, now); + p->sched_class->task_new(rq, p); + inc_nr_running(p, rq); } check_preempt_curr(rq, p); task_rq_unlock(rq, &flags); @@ -1954,7 +1957,6 @@ static void update_cpu_load(struct rq *this_rq) unsigned long total_load = this_rq->ls.load.weight; unsigned long this_load = total_load; struct load_stat *ls = &this_rq->ls; - u64 now = __rq_clock(this_rq); int i, scale; this_rq->nr_load_updates++; @@ -1962,7 +1964,7 @@ static void update_cpu_load(struct rq *this_rq) goto do_avg; /* Update delta_fair/delta_exec fields first */ - update_curr_load(this_rq, now); + update_curr_load(this_rq); fair_delta64 = ls->delta_fair + 1; ls->delta_fair = 0; @@ -1970,8 +1972,8 @@ static void update_cpu_load(struct rq *this_rq) exec_delta64 = ls->delta_exec + 1; ls->delta_exec = 0; - sample_interval64 = now - ls->load_update_last; - ls->load_update_last = now; + sample_interval64 = this_rq->clock - ls->load_update_last; + ls->load_update_last = this_rq->clock; if ((s64)sample_interval64 < (s64)TICK_NSEC) sample_interval64 = TICK_NSEC; @@ -2026,6 +2028,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) spin_lock(&rq1->lock); } } + update_rq_clock(rq1); + update_rq_clock(rq2); } /* @@ -2166,8 +2170,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator) + int *this_best_prio, struct rq_iterator *iterator) { int pulled = 0, pinned = 0, skip_for_load; struct task_struct *p; @@ -2192,12 +2195,8 @@ next: */ skip_for_load = (p->se.load.weight >> 1) > rem_load_move + SCHED_LOAD_SCALE_FUZZ; - if (skip_for_load && p->prio < this_best_prio) - skip_for_load = !best_prio_seen && p->prio == best_prio; - if (skip_for_load || + if ((skip_for_load && p->prio >= *this_best_prio) || !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { - - best_prio_seen |= p->prio == best_prio; p = iterator->next(iterator->arg); goto next; } @@ -2211,8 +2210,8 @@ next: * and the prescribed amount of weighted load. */ if (pulled < max_nr_move && rem_load_move > 0) { - if (p->prio < this_best_prio) - this_best_prio = p->prio; + if (p->prio < *this_best_prio) + *this_best_prio = p->prio; p = iterator->next(iterator->arg); goto next; } @@ -2231,32 +2230,52 @@ out: } /* - * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted - * load from busiest to this_rq, as part of a balancing operation within - * "domain". Returns the number of tasks moved. + * move_tasks tries to move up to max_load_move weighted load from busiest to + * this_rq, as part of a balancing operation within domain "sd". + * Returns 1 if successful and 0 otherwise. * * Called with both runqueues locked. */ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, + unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned) { struct sched_class *class = sched_class_highest; - unsigned long load_moved, total_nr_moved = 0, nr_moved; - long rem_load_move = max_load_move; + unsigned long total_load_moved = 0; + int this_best_prio = this_rq->curr->prio; do { - nr_moved = class->load_balance(this_rq, this_cpu, busiest, - max_nr_move, (unsigned long)rem_load_move, - sd, idle, all_pinned, &load_moved); - total_nr_moved += nr_moved; - max_nr_move -= nr_moved; - rem_load_move -= load_moved; + total_load_moved += + class->load_balance(this_rq, this_cpu, busiest, + ULONG_MAX, max_load_move - total_load_moved, + sd, idle, all_pinned, &this_best_prio); class = class->next; - } while (class && max_nr_move && rem_load_move > 0); + } while (class && max_load_move > total_load_moved); - return total_nr_moved; + return total_load_moved > 0; +} + +/* + * move_one_task tries to move exactly one task from busiest to this_rq, as + * part of active balancing operations within "domain". + * Returns 1 if successful and 0 otherwise. + * + * Called with both runqueues locked. + */ +static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) +{ + struct sched_class *class; + int this_best_prio = MAX_PRIO; + + for (class = sched_class_highest; class; class = class->next) + if (class->load_balance(this_rq, this_cpu, busiest, + 1, ULONG_MAX, sd, idle, NULL, + &this_best_prio)) + return 1; + + return 0; } /* @@ -2588,11 +2607,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ #define MAX_PINNED_INTERVAL 512 -static inline unsigned long minus_1_or_zero(unsigned long n) -{ - return n > 0 ? n - 1 : 0; -} - /* * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. @@ -2601,7 +2615,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *balance) { - int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; + int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; unsigned long imbalance; struct rq *busiest; @@ -2642,18 +2656,17 @@ redo: schedstat_add(sd, lb_imbalance[idle], imbalance); - nr_moved = 0; + ld_moved = 0; if (busiest->nr_running > 1) { /* * Attempt to move tasks. If find_busiest_group has found * an imbalance but busiest->nr_running <= 1, the group is - * still unbalanced. nr_moved simply stays zero, so it is + * still unbalanced. ld_moved simply stays zero, so it is * correctly treated as an imbalance. */ local_irq_save(flags); double_rq_lock(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), + ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); local_irq_restore(flags); @@ -2661,7 +2674,7 @@ redo: /* * some other cpu did the load balance for us. */ - if (nr_moved && this_cpu != smp_processor_id()) + if (ld_moved && this_cpu != smp_processor_id()) resched_cpu(this_cpu); /* All tasks on this runqueue were pinned by CPU affinity */ @@ -2673,7 +2686,7 @@ redo: } } - if (!nr_moved) { + if (!ld_moved) { schedstat_inc(sd, lb_failed[idle]); sd->nr_balance_failed++; @@ -2722,10 +2735,10 @@ redo: sd->balance_interval *= 2; } - if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && + if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; - return nr_moved; + return ld_moved; out_balanced: schedstat_inc(sd, lb_balanced[idle]); @@ -2757,7 +2770,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) struct sched_group *group; struct rq *busiest = NULL; unsigned long imbalance; - int nr_moved = 0; + int ld_moved = 0; int sd_idle = 0; int all_pinned = 0; cpumask_t cpus = CPU_MASK_ALL; @@ -2792,12 +2805,13 @@ redo: schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); - nr_moved = 0; + ld_moved = 0; if (busiest->nr_running > 1) { /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), + /* this_rq->clock is already updated */ + update_rq_clock(busiest); + ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, CPU_NEWLY_IDLE, &all_pinned); spin_unlock(&busiest->lock); @@ -2809,7 +2823,7 @@ redo: } } - if (!nr_moved) { + if (!ld_moved) { schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) @@ -2817,7 +2831,7 @@ redo: } else sd->nr_balance_failed = 0; - return nr_moved; + return ld_moved; out_balanced: schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); @@ -2894,6 +2908,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) /* move a task from busiest_rq to target_rq */ double_lock_balance(busiest_rq, target_rq); + update_rq_clock(busiest_rq); + update_rq_clock(target_rq); /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { @@ -2905,8 +2921,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) if (likely(sd)) { schedstat_inc(sd, alb_cnt); - if (move_tasks(target_rq, target_cpu, busiest_rq, 1, - ULONG_MAX, sd, CPU_IDLE, NULL)) + if (move_one_task(target_rq, target_cpu, busiest_rq, + sd, CPU_IDLE)) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); @@ -3090,7 +3106,7 @@ static void run_rebalance_domains(struct softirq_action *h) if (need_resched()) break; - rebalance_domains(balance_cpu, SCHED_IDLE); + rebalance_domains(balance_cpu, CPU_IDLE); rq = cpu_rq(balance_cpu); if (time_after(this_rq->next_balance, rq->next_balance)) @@ -3175,8 +3191,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator) + int *this_best_prio, struct rq_iterator *iterator) { *load_moved = 0; @@ -3202,7 +3217,8 @@ unsigned long long task_sched_runtime(struct task_struct *p) rq = task_rq_lock(p, &flags); ns = p->se.sum_exec_runtime; if (rq->curr == p) { - delta_exec = rq_clock(rq) - p->se.exec_start; + update_rq_clock(rq); + delta_exec = rq->clock - p->se.exec_start; if ((s64)delta_exec > 0) ns += delta_exec; } @@ -3296,11 +3312,19 @@ void scheduler_tick(void) int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; + u64 next_tick = rq->tick_timestamp + TICK_NSEC; spin_lock(&rq->lock); + __update_rq_clock(rq); + /* + * Let rq->clock advance by at least TICK_NSEC: + */ + if (unlikely(rq->clock < next_tick)) + rq->clock = next_tick; + rq->tick_timestamp = rq->clock; + update_cpu_load(rq); if (curr != rq->idle) /* FIXME: needed? */ curr->sched_class->task_tick(rq, curr); - update_cpu_load(rq); spin_unlock(&rq->lock); #ifdef CONFIG_SMP @@ -3382,7 +3406,7 @@ static inline void schedule_debug(struct task_struct *prev) * Pick up the highest-prio task: */ static inline struct task_struct * -pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) +pick_next_task(struct rq *rq, struct task_struct *prev) { struct sched_class *class; struct task_struct *p; @@ -3392,14 +3416,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) * the fair class we can call that function directly: */ if (likely(rq->nr_running == rq->cfs.nr_running)) { - p = fair_sched_class.pick_next_task(rq, now); + p = fair_sched_class.pick_next_task(rq); if (likely(p)) return p; } class = sched_class_highest; for ( ; ; ) { - p = class->pick_next_task(rq, now); + p = class->pick_next_task(rq); if (p) return p; /* @@ -3418,7 +3442,6 @@ asmlinkage void __sched schedule(void) struct task_struct *prev, *next; long *switch_count; struct rq *rq; - u64 now; int cpu; need_resched: @@ -3436,6 +3459,7 @@ need_resched_nonpreemptible: spin_lock_irq(&rq->lock); clear_tsk_need_resched(prev); + __update_rq_clock(rq); if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely((prev->state & TASK_INTERRUPTIBLE) && @@ -3450,9 +3474,8 @@ need_resched_nonpreemptible: if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); - now = __rq_clock(rq); - prev->sched_class->put_prev_task(rq, prev, now); - next = pick_next_task(rq, prev, now); + prev->sched_class->put_prev_task(rq, prev); + next = pick_next_task(rq, prev); sched_info_switch(prev, next); @@ -3895,17 +3918,16 @@ void rt_mutex_setprio(struct task_struct *p, int prio) unsigned long flags; int oldprio, on_rq; struct rq *rq; - u64 now; BUG_ON(prio < 0 || prio > MAX_PRIO); rq = task_rq_lock(p, &flags); - now = rq_clock(rq); + update_rq_clock(rq); oldprio = p->prio; on_rq = p->se.on_rq; if (on_rq) - dequeue_task(rq, p, 0, now); + dequeue_task(rq, p, 0); if (rt_prio(prio)) p->sched_class = &rt_sched_class; @@ -3915,7 +3937,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) p->prio = prio; if (on_rq) { - enqueue_task(rq, p, 0, now); + enqueue_task(rq, p, 0); /* * Reschedule if we are currently running on this runqueue and * our priority decreased, or if we are not currently running on @@ -3938,7 +3960,6 @@ void set_user_nice(struct task_struct *p, long nice) int old_prio, delta, on_rq; unsigned long flags; struct rq *rq; - u64 now; if (TASK_NICE(p) == nice || nice < -20 || nice > 19) return; @@ -3947,7 +3968,7 @@ void set_user_nice(struct task_struct *p, long nice) * the task might be in the middle of scheduling on another CPU. */ rq = task_rq_lock(p, &flags); - now = rq_clock(rq); + update_rq_clock(rq); /* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected @@ -3960,8 +3981,8 @@ void set_user_nice(struct task_struct *p, long nice) } on_rq = p->se.on_rq; if (on_rq) { - dequeue_task(rq, p, 0, now); - dec_load(rq, p, now); + dequeue_task(rq, p, 0); + dec_load(rq, p); } p->static_prio = NICE_TO_PRIO(nice); @@ -3971,8 +3992,8 @@ void set_user_nice(struct task_struct *p, long nice) delta = p->prio - old_prio; if (on_rq) { - enqueue_task(rq, p, 0, now); - inc_load(rq, p, now); + enqueue_task(rq, p, 0); + inc_load(rq, p); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: @@ -4208,6 +4229,7 @@ recheck: spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } + update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) deactivate_task(rq, p, 0); @@ -4463,10 +4485,8 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) out_unlock: read_unlock(&tasklist_lock); mutex_unlock(&sched_hotcpu_mutex); - if (retval) - return retval; - return 0; + return retval; } /** @@ -4966,6 +4986,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) on_rq = p->se.on_rq; if (on_rq) deactivate_task(rq_src, p, 0); + set_task_cpu(p, dest_cpu); if (on_rq) { activate_task(rq_dest, p, 0); @@ -5198,7 +5219,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu) for ( ; ; ) { if (!rq->nr_running) break; - next = pick_next_task(rq, rq->curr, rq_clock(rq)); + update_rq_clock(rq); + next = pick_next_task(rq, rq->curr); if (!next) break; migrate_dead(dead_cpu, next); @@ -5210,12 +5232,19 @@ static void migrate_dead_tasks(unsigned int dead_cpu) #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) static struct ctl_table sd_ctl_dir[] = { - {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, }, + { + .procname = "sched_domain", + .mode = 0755, + }, {0,}, }; static struct ctl_table sd_ctl_root[] = { - {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, }, + { + .procname = "kernel", + .mode = 0755, + .child = sd_ctl_dir, + }, {0,}, }; @@ -5231,11 +5260,10 @@ static struct ctl_table *sd_alloc_ctl_entry(int n) } static void -set_table_entry(struct ctl_table *entry, int ctl_name, +set_table_entry(struct ctl_table *entry, const char *procname, void *data, int maxlen, mode_t mode, proc_handler *proc_handler) { - entry->ctl_name = ctl_name; entry->procname = procname; entry->data = data; entry->maxlen = maxlen; @@ -5248,28 +5276,28 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) { struct ctl_table *table = sd_alloc_ctl_entry(14); - set_table_entry(&table[0], 1, "min_interval", &sd->min_interval, + set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[1], 2, "max_interval", &sd->max_interval, + set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx, + set_table_entry(&table[2], "busy_idx", &sd->busy_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx, + set_table_entry(&table[3], "idle_idx", &sd->idle_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx, + set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx, + set_table_entry(&table[5], "wake_idx", &sd->wake_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx, + set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor, + set_table_entry(&table[7], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, + set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[10], 11, "cache_nice_tries", + set_table_entry(&table[10], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[12], 13, "flags", &sd->flags, + set_table_entry(&table[12], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); return table; @@ -5289,7 +5317,6 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) i = 0; for_each_domain(cpu, sd) { snprintf(buf, 32, "domain%d", i); - entry->ctl_name = i + 1; entry->procname = kstrdup(buf, GFP_KERNEL); entry->mode = 0755; entry->child = sd_alloc_ctl_domain_table(sd); @@ -5310,7 +5337,6 @@ static void init_sched_domain_sysctl(void) for (i = 0; i < cpu_num; i++, entry++) { snprintf(buf, 32, "cpu%d", i); - entry->ctl_name = i + 1; entry->procname = kstrdup(buf, GFP_KERNEL); entry->mode = 0755; entry->child = sd_alloc_ctl_cpu_table(i); @@ -5379,6 +5405,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq->migration_thread = NULL; /* Idle task back to normal (off runqueue, low prio) */ rq = task_rq_lock(rq->idle, &flags); + update_rq_clock(rq); deactivate_task(rq, rq->idle, 0); rq->idle->static_prio = MAX_PRIO; __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); @@ -6301,7 +6328,7 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -int arch_reinit_sched_domains(void) +static int arch_reinit_sched_domains(void) { int err; @@ -6330,24 +6357,6 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) return ret ? ret : count; } -int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) -{ - int err = 0; - -#ifdef CONFIG_SCHED_SMT - if (smt_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_smt_power_savings.attr); -#endif -#ifdef CONFIG_SCHED_MC - if (!err && mc_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_mc_power_savings.attr); -#endif - return err; -} -#endif - #ifdef CONFIG_SCHED_MC static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) { @@ -6358,8 +6367,8 @@ static ssize_t sched_mc_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 0); } -SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, - sched_mc_power_savings_store); +static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, + sched_mc_power_savings_store); #endif #ifdef CONFIG_SCHED_SMT @@ -6372,8 +6381,26 @@ static ssize_t sched_smt_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 1); } -SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, - sched_smt_power_savings_store); +static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, + sched_smt_power_savings_store); +#endif + +int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +{ + int err = 0; + +#ifdef CONFIG_SCHED_SMT + if (smt_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_smt_power_savings.attr); +#endif +#ifdef CONFIG_SCHED_MC + if (!err && mc_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_mc_power_savings.attr); +#endif + return err; +} #endif /* @@ -6616,12 +6643,13 @@ void normalize_rt_tasks(void) goto out_unlock; #endif + update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) - deactivate_task(task_rq(p), p, 0); + deactivate_task(rq, p, 0); __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { - activate_task(task_rq(p), p, 0); + activate_task(rq, p, 0); resched_task(rq->curr); } #ifdef CONFIG_SMP diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 1c61e5315ad..87e524762b8 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -29,34 +29,34 @@ } while (0) static void -print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now) +print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { if (rq->curr == p) SEQ_printf(m, "R"); else SEQ_printf(m, " "); - SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d " - "%15Ld %15Ld %15Ld %15Ld %15Ld\n", + SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d ", p->comm, p->pid, (long long)p->se.fair_key, (long long)(p->se.fair_key - rq->cfs.fair_clock), (long long)p->se.wait_runtime, (long long)(p->nvcsw + p->nivcsw), - p->prio, + p->prio); #ifdef CONFIG_SCHEDSTATS + SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n", (long long)p->se.sum_exec_runtime, (long long)p->se.sum_wait_runtime, (long long)p->se.sum_sleep_runtime, (long long)p->se.wait_runtime_overruns, - (long long)p->se.wait_runtime_underruns + (long long)p->se.wait_runtime_underruns); #else - 0LL, 0LL, 0LL, 0LL, 0LL + SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n", + 0LL, 0LL, 0LL, 0LL, 0LL); #endif - ); } -static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now) +static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) { struct task_struct *g, *p; @@ -77,7 +77,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now) if (!p->se.on_rq || task_cpu(p) != rq_cpu) continue; - print_task(m, rq, p, now); + print_task(m, rq, p); } while_each_thread(g, p); read_unlock_irq(&tasklist_lock); @@ -106,9 +106,9 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) (long long)wait_runtime_rq_sum); } -void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) +void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { - SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq); + SEQ_printf(m, "\ncfs_rq\n"); #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x)) @@ -124,7 +124,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) print_cfs_rq_runtime_sum(m, cpu, cfs_rq); } -static void print_cpu(struct seq_file *m, int cpu, u64 now) +static void print_cpu(struct seq_file *m, int cpu) { struct rq *rq = &per_cpu(runqueues, cpu); @@ -166,9 +166,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) P(cpu_load[4]); #undef P - print_cfs_stats(m, cpu, now); + print_cfs_stats(m, cpu); - print_rq(m, rq, cpu, now); + print_rq(m, rq, cpu); } static int sched_debug_show(struct seq_file *m, void *v) @@ -184,7 +184,7 @@ static int sched_debug_show(struct seq_file *m, void *v) SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now); for_each_online_cpu(cpu) - print_cpu(m, cpu, now); + print_cpu(m, cpu); SEQ_printf(m, "\n"); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6f579ff5a9b..fedbb51bba9 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -75,7 +75,7 @@ enum { unsigned int sysctl_sched_features __read_mostly = SCHED_FEAT_FAIR_SLEEPERS *1 | - SCHED_FEAT_SLEEPER_AVG *1 | + SCHED_FEAT_SLEEPER_AVG *0 | SCHED_FEAT_SLEEPER_LOAD_AVG *1 | SCHED_FEAT_PRECISE_CPU_LOAD *1 | SCHED_FEAT_START_DEBIT *1 | @@ -222,21 +222,25 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity) { u64 tmp; + if (likely(curr->load.weight == NICE_0_LOAD)) + return granularity; /* - * Negative nice levels get the same granularity as nice-0: + * Positive nice levels get the same granularity as nice-0: */ - if (likely(curr->load.weight >= NICE_0_LOAD)) - return granularity; + if (likely(curr->load.weight < NICE_0_LOAD)) { + tmp = curr->load.weight * (u64)granularity; + return (long) (tmp >> NICE_0_SHIFT); + } /* - * Positive nice level tasks get linearly finer + * Negative nice level tasks get linearly finer * granularity: */ - tmp = curr->load.weight * (u64)granularity; + tmp = curr->load.inv_weight * (u64)granularity; /* * It will always fit into 'long': */ - return (long) (tmp >> NICE_0_SHIFT); + return (long) (tmp >> WMULT_SHIFT); } static inline void @@ -281,31 +285,28 @@ add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta) * are not in our scheduling class. */ static inline void -__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now) +__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) { - unsigned long delta, delta_exec, delta_fair; - long delta_mine; + unsigned long delta, delta_exec, delta_fair, delta_mine; struct load_weight *lw = &cfs_rq->load; unsigned long load = lw->weight; - if (unlikely(!load)) - return; - delta_exec = curr->delta_exec; schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); curr->sum_exec_runtime += delta_exec; cfs_rq->exec_clock += delta_exec; + if (unlikely(!load)) + return; + delta_fair = calc_delta_fair(delta_exec, lw); delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); - if (cfs_rq->sleeper_bonus > sysctl_sched_stat_granularity) { - delta = calc_delta_mine(cfs_rq->sleeper_bonus, - curr->load.weight, lw); - if (unlikely(delta > cfs_rq->sleeper_bonus)) - delta = cfs_rq->sleeper_bonus; - + if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { + delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec); + delta = calc_delta_mine(delta, curr->load.weight, lw); + delta = min((u64)delta, cfs_rq->sleeper_bonus); cfs_rq->sleeper_bonus -= delta; delta_mine -= delta; } @@ -321,7 +322,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now) add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec); } -static void update_curr(struct cfs_rq *cfs_rq, u64 now) +static void update_curr(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq_curr(cfs_rq); unsigned long delta_exec; @@ -334,22 +335,22 @@ static void update_curr(struct cfs_rq *cfs_rq, u64 now) * since the last time we changed load (this cannot * overflow on 32 bits): */ - delta_exec = (unsigned long)(now - curr->exec_start); + delta_exec = (unsigned long)(rq_of(cfs_rq)->clock - curr->exec_start); curr->delta_exec += delta_exec; if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) { - __update_curr(cfs_rq, curr, now); + __update_curr(cfs_rq, curr); curr->delta_exec = 0; } - curr->exec_start = now; + curr->exec_start = rq_of(cfs_rq)->clock; } static inline void -update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) { se->wait_start_fair = cfs_rq->fair_clock; - schedstat_set(se->wait_start, now); + schedstat_set(se->wait_start, rq_of(cfs_rq)->clock); } /* @@ -377,8 +378,7 @@ calc_weighted(unsigned long delta, unsigned long weight, int shift) /* * Task is being enqueued - update stats: */ -static void -update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) { s64 key; @@ -387,7 +387,7 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) * a dequeue/enqueue event is a NOP) */ if (se != cfs_rq_curr(cfs_rq)) - update_stats_wait_start(cfs_rq, se, now); + update_stats_wait_start(cfs_rq, se); /* * Update the key: */ @@ -407,7 +407,8 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) (WMULT_SHIFT - NICE_0_SHIFT); } else { tmp = se->wait_runtime; - key -= (tmp * se->load.weight) >> NICE_0_SHIFT; + key -= (tmp * se->load.inv_weight) >> + (WMULT_SHIFT - NICE_0_SHIFT); } } @@ -418,11 +419,12 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) * Note: must be called with a freshly updated rq->fair_clock. */ static inline void -__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned long delta_fair = se->delta_fair_run; - schedstat_set(se->wait_max, max(se->wait_max, now - se->wait_start)); + schedstat_set(se->wait_max, max(se->wait_max, + rq_of(cfs_rq)->clock - se->wait_start)); if (unlikely(se->load.weight != NICE_0_LOAD)) delta_fair = calc_weighted(delta_fair, se->load.weight, @@ -432,7 +434,7 @@ __update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) } static void -update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned long delta_fair; @@ -442,7 +444,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) se->delta_fair_run += delta_fair; if (unlikely(abs(se->delta_fair_run) >= sysctl_sched_stat_granularity)) { - __update_stats_wait_end(cfs_rq, se, now); + __update_stats_wait_end(cfs_rq, se); se->delta_fair_run = 0; } @@ -451,34 +453,34 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) } static inline void -update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) { - update_curr(cfs_rq, now); + update_curr(cfs_rq); /* * Mark the end of the wait period if dequeueing a * waiting task: */ if (se != cfs_rq_curr(cfs_rq)) - update_stats_wait_end(cfs_rq, se, now); + update_stats_wait_end(cfs_rq, se); } /* * We are picking a new current task - update its stats: */ static inline void -update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) { /* * We are starting a new run period: */ - se->exec_start = now; + se->exec_start = rq_of(cfs_rq)->clock; } /* * We are descheduling a task - update its stats: */ static inline void -update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { se->exec_start = 0; } @@ -487,8 +489,7 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) * Scheduling class queueing methods: */ -static void -__enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned long load = cfs_rq->load.weight, delta_fair; long prev_runtime; @@ -518,12 +519,13 @@ __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) * Track the amount of bonus we've given to sleepers: */ cfs_rq->sleeper_bonus += delta_fair; + if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) + cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit; schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); } -static void -enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) { struct task_struct *tsk = task_of(se); unsigned long delta_fair; @@ -538,7 +540,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) se->delta_fair_sleep += delta_fair; if (unlikely(abs(se->delta_fair_sleep) >= sysctl_sched_stat_granularity)) { - __enqueue_sleeper(cfs_rq, se, now); + __enqueue_sleeper(cfs_rq, se); se->delta_fair_sleep = 0; } @@ -546,7 +548,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) #ifdef CONFIG_SCHEDSTATS if (se->sleep_start) { - u64 delta = now - se->sleep_start; + u64 delta = rq_of(cfs_rq)->clock - se->sleep_start; if ((s64)delta < 0) delta = 0; @@ -558,7 +560,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) se->sum_sleep_runtime += delta; } if (se->block_start) { - u64 delta = now - se->block_start; + u64 delta = rq_of(cfs_rq)->clock - se->block_start; if ((s64)delta < 0) delta = 0; @@ -573,26 +575,24 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) } static void -enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - int wakeup, u64 now) +enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) { /* * Update the fair clock. */ - update_curr(cfs_rq, now); + update_curr(cfs_rq); if (wakeup) - enqueue_sleeper(cfs_rq, se, now); + enqueue_sleeper(cfs_rq, se); - update_stats_enqueue(cfs_rq, se, now); + update_stats_enqueue(cfs_rq, se); __enqueue_entity(cfs_rq, se); } static void -dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - int sleep, u64 now) +dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) { - update_stats_dequeue(cfs_rq, se, now); + update_stats_dequeue(cfs_rq, se); if (sleep) { se->sleep_start_fair = cfs_rq->fair_clock; #ifdef CONFIG_SCHEDSTATS @@ -600,9 +600,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, struct task_struct *tsk = task_of(se); if (tsk->state & TASK_INTERRUPTIBLE) - se->sleep_start = now; + se->sleep_start = rq_of(cfs_rq)->clock; if (tsk->state & TASK_UNINTERRUPTIBLE) - se->block_start = now; + se->block_start = rq_of(cfs_rq)->clock; } cfs_rq->wait_runtime -= se->wait_runtime; #endif @@ -629,7 +629,7 @@ __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, } static inline void -set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) +set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { /* * Any task has to be enqueued before it get to execute on @@ -638,49 +638,46 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) * done a put_prev_task_fair() shortly before this, which * updated rq->fair_clock - used by update_stats_wait_end()) */ - update_stats_wait_end(cfs_rq, se, now); - update_stats_curr_start(cfs_rq, se, now); + update_stats_wait_end(cfs_rq, se); + update_stats_curr_start(cfs_rq, se); set_cfs_rq_curr(cfs_rq, se); } -static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq, u64 now) +static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { struct sched_entity *se = __pick_next_entity(cfs_rq); - set_next_entity(cfs_rq, se, now); + set_next_entity(cfs_rq, se); return se; } -static void -put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev, u64 now) +static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) { /* * If still on the runqueue then deactivate_task() * was not called and update_curr() has to be done: */ if (prev->on_rq) - update_curr(cfs_rq, now); + update_curr(cfs_rq); - update_stats_curr_end(cfs_rq, prev, now); + update_stats_curr_end(cfs_rq, prev); if (prev->on_rq) - update_stats_wait_start(cfs_rq, prev, now); + update_stats_wait_start(cfs_rq, prev); set_cfs_rq_curr(cfs_rq, NULL); } static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) { - struct rq *rq = rq_of(cfs_rq); struct sched_entity *next; - u64 now = __rq_clock(rq); /* * Dequeue and enqueue the task to update its * position within the tree: */ - dequeue_entity(cfs_rq, curr, 0, now); - enqueue_entity(cfs_rq, curr, 0, now); + dequeue_entity(cfs_rq, curr, 0); + enqueue_entity(cfs_rq, curr, 0); /* * Reschedule if another task tops the current one. @@ -785,8 +782,7 @@ static inline int is_same_group(struct task_struct *curr, struct task_struct *p) * increased. Here we update the fair scheduling stats and * then put the task into the rbtree: */ -static void -enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; @@ -795,7 +791,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now) if (se->on_rq) break; cfs_rq = cfs_rq_of(se); - enqueue_entity(cfs_rq, se, wakeup, now); + enqueue_entity(cfs_rq, se, wakeup); } } @@ -804,15 +800,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now) * decreased. We remove the task from the rbtree and * update the fair scheduling stats: */ -static void -dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); - dequeue_entity(cfs_rq, se, sleep, now); + dequeue_entity(cfs_rq, se, sleep); /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) break; @@ -825,14 +820,14 @@ dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now) static void yield_task_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); - u64 now = __rq_clock(rq); + __update_rq_clock(rq); /* * Dequeue and enqueue the task to update its * position within the tree: */ - dequeue_entity(cfs_rq, &p->se, 0, now); - enqueue_entity(cfs_rq, &p->se, 0, now); + dequeue_entity(cfs_rq, &p->se, 0); + enqueue_entity(cfs_rq, &p->se, 0); } /* @@ -845,7 +840,8 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) unsigned long gran; if (unlikely(rt_prio(p->prio))) { - update_curr(cfs_rq, rq_clock(rq)); + update_rq_clock(rq); + update_curr(cfs_rq); resched_task(curr); return; } @@ -861,7 +857,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran); } -static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_fair(struct rq *rq) { struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; @@ -870,7 +866,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now) return NULL; do { - se = pick_next_entity(cfs_rq, now); + se = pick_next_entity(cfs_rq); cfs_rq = group_cfs_rq(se); } while (cfs_rq); @@ -880,14 +876,14 @@ static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now) /* * Account for a descheduled task: */ -static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now) +static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) { struct sched_entity *se = &prev->se; struct cfs_rq *cfs_rq; for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); - put_prev_entity(cfs_rq, se, now); + put_prev_entity(cfs_rq, se); } } @@ -930,6 +926,7 @@ static struct task_struct *load_balance_next_fair(void *arg) return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); } +#ifdef CONFIG_FAIR_GROUP_SCHED static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) { struct sched_entity *curr; @@ -943,12 +940,13 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) return p->prio; } +#endif -static int +static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *total_load_moved) + unsigned long max_nr_move, unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, int *this_best_prio) { struct cfs_rq *busy_cfs_rq; unsigned long load_moved, total_nr_moved = 0, nr_moved; @@ -959,15 +957,14 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, cfs_rq_iterator.next = load_balance_next_fair; for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { +#ifdef CONFIG_FAIR_GROUP_SCHED struct cfs_rq *this_cfs_rq; long imbalance; unsigned long maxload; - int this_best_prio, best_prio, best_prio_seen = 0; this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); - imbalance = busy_cfs_rq->load.weight - - this_cfs_rq->load.weight; + imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ if (imbalance <= 0) continue; @@ -976,27 +973,17 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, imbalance /= 2; maxload = min(rem_load_move, imbalance); - this_best_prio = cfs_rq_best_prio(this_cfs_rq); - best_prio = cfs_rq_best_prio(busy_cfs_rq); - - /* - * Enable handling of the case where there is more than one task - * with the best priority. If the current running task is one - * of those with prio==best_prio we know it won't be moved - * and therefore it's safe to override the skip (based on load) - * of any task we find with that prio. - */ - if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se) - best_prio_seen = 1; - + *this_best_prio = cfs_rq_best_prio(this_cfs_rq); +#else +# define maxload rem_load_move +#endif /* pass busy_cfs_rq argument into * load_balance_[start|next]_fair iterators */ cfs_rq_iterator.arg = busy_cfs_rq; nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, maxload, sd, idle, all_pinned, - &load_moved, this_best_prio, best_prio, - best_prio_seen, &cfs_rq_iterator); + &load_moved, this_best_prio, &cfs_rq_iterator); total_nr_moved += nr_moved; max_nr_move -= nr_moved; @@ -1006,9 +993,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, break; } - *total_load_moved = max_load_move - rem_load_move; - - return total_nr_moved; + return max_load_move - rem_load_move; } /* @@ -1032,14 +1017,14 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr) * monopolize the CPU. Note: the parent runqueue is locked, * the child is not running yet. */ -static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now) +static void task_new_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); struct sched_entity *se = &p->se; sched_info_queued(p); - update_stats_enqueue(cfs_rq, se, now); + update_stats_enqueue(cfs_rq, se); /* * Child runs first: we let it run before the parent * until it reschedules once. We set up the key so that @@ -1072,15 +1057,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now) */ static void set_curr_task_fair(struct rq *rq) { - struct task_struct *curr = rq->curr; - struct sched_entity *se = &curr->se; - u64 now = rq_clock(rq); - struct cfs_rq *cfs_rq; + struct sched_entity *se = &rq->curr.se; - for_each_sched_entity(se) { - cfs_rq = cfs_rq_of(se); - set_next_entity(cfs_rq, se, now); - } + for_each_sched_entity(se) + set_next_entity(cfs_rq_of(se), se); } #else static void set_curr_task_fair(struct rq *rq) @@ -1109,12 +1089,11 @@ struct sched_class fair_sched_class __read_mostly = { }; #ifdef CONFIG_SCHED_DEBUG -void print_cfs_stats(struct seq_file *m, int cpu, u64 now) +static void print_cfs_stats(struct seq_file *m, int cpu) { - struct rq *rq = cpu_rq(cpu); struct cfs_rq *cfs_rq; - for_each_leaf_cfs_rq(rq, cfs_rq) - print_cfs_rq(m, cpu, cfs_rq, now); + for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq) + print_cfs_rq(m, cpu, cfs_rq); } #endif diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 41841e741c4..3503fb2d9f9 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -13,7 +13,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p) resched_task(rq->idle); } -static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); @@ -25,7 +25,7 @@ static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) * message if some code attempts to do it: */ static void -dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now) +dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) { spin_unlock_irq(&rq->lock); printk(KERN_ERR "bad: scheduling from the idle thread!\n"); @@ -33,15 +33,15 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now) spin_lock_irq(&rq->lock); } -static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) +static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { } -static int +static unsigned long load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *total_load_moved) + int *all_pinned, int *this_best_prio) { return 0; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 002fcf8d3f6..dcdcad632fd 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -7,7 +7,7 @@ * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. */ -static inline void update_curr_rt(struct rq *rq, u64 now) +static inline void update_curr_rt(struct rq *rq) { struct task_struct *curr = rq->curr; u64 delta_exec; @@ -15,18 +15,17 @@ static inline void update_curr_rt(struct rq *rq, u64 now) if (!task_has_rt_policy(curr)) return; - delta_exec = now - curr->se.exec_start; + delta_exec = rq->clock - curr->se.exec_start; if (unlikely((s64)delta_exec < 0)) delta_exec = 0; schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; - curr->se.exec_start = now; + curr->se.exec_start = rq->clock; } -static void -enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) { struct rt_prio_array *array = &rq->rt.active; @@ -37,12 +36,11 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now) /* * Adding/removing a task to/from a priority array: */ -static void -dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) { struct rt_prio_array *array = &rq->rt.active; - update_curr_rt(rq, now); + update_curr_rt(rq); list_del(&p->run_list); if (list_empty(array->queue + p->prio)) @@ -75,7 +73,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) resched_task(rq->curr); } -static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_rt(struct rq *rq) { struct rt_prio_array *array = &rq->rt.active; struct task_struct *next; @@ -89,14 +87,14 @@ static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now) queue = array->queue + idx; next = list_entry(queue->next, struct task_struct, run_list); - next->se.exec_start = now; + next->se.exec_start = rq->clock; return next; } -static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now) +static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { - update_curr_rt(rq, now); + update_curr_rt(rq); p->se.exec_start = 0; } @@ -172,28 +170,15 @@ static struct task_struct *load_balance_next_rt(void *arg) return p; } -static int +static unsigned long load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *load_moved) + int *all_pinned, int *this_best_prio) { - int this_best_prio, best_prio, best_prio_seen = 0; int nr_moved; struct rq_iterator rt_rq_iterator; - - best_prio = sched_find_first_bit(busiest->rt.active.bitmap); - this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); - - /* - * Enable handling of the case where there is more than one task - * with the best priority. If the current running task is one - * of those with prio==best_prio we know it won't be moved - * and therefore it's safe to override the skip (based on load) - * of any task we find with that prio. - */ - if (busiest->curr->prio == best_prio) - best_prio_seen = 1; + unsigned long load_moved; rt_rq_iterator.start = load_balance_start_rt; rt_rq_iterator.next = load_balance_next_rt; @@ -203,11 +188,10 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, rt_rq_iterator.arg = busiest; nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, - max_load_move, sd, idle, all_pinned, load_moved, - this_best_prio, best_prio, best_prio_seen, - &rt_rq_iterator); + max_load_move, sd, idle, all_pinned, &load_moved, + this_best_prio, &rt_rq_iterator); - return nr_moved; + return load_moved; } static void task_tick_rt(struct rq *rq, struct task_struct *p) diff --git a/kernel/signal.c b/kernel/signal.c index ef8156a6aad..ad63109e413 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -378,7 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ - if (tsk == current) + if (likely(tsk == current)) signr = __dequeue_signal(&tsk->pending, mask, info); if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, @@ -425,7 +425,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; } - if ( signr && + if (signr && likely(tsk == current) && ((info->si_code & __SI_MASK) == __SI_TIMER) && info->si_sys_private){ /* @@ -1561,10 +1561,6 @@ static inline int may_ptrace_stop(void) (current->ptrace & PT_ATTACHED))) return 0; - if (unlikely(current->signal == current->parent->signal) && - unlikely(current->signal->flags & SIGNAL_GROUP_EXIT)) - return 0; - /* * Are we in the middle of do_coredump? * If so and our tracer is also part of the coredump stopping diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 79c891e6266..9029690f4fa 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -27,7 +27,6 @@ #include <linux/capability.h> #include <linux/ctype.h> #include <linux/utsname.h> -#include <linux/capability.h> #include <linux/smp_lock.h> #include <linux/fs.h> #include <linux/init.h> @@ -1023,6 +1022,7 @@ static ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_doulongvec_minmax, }, +#endif #ifdef CONFIG_NUMA { .ctl_name = CTL_UNNUMBERED, @@ -1034,7 +1034,6 @@ static ctl_table vm_table[] = { .strategy = &sysctl_string, }, #endif -#endif #if defined(CONFIG_X86_32) || \ (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) { diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 2ad1c37b8df..41dd3105ce7 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -113,16 +113,6 @@ int clockevents_register_notifier(struct notifier_block *nb) return ret; } -/** - * clockevents_unregister_notifier - unregister a clock events change listener - */ -void clockevents_unregister_notifier(struct notifier_block *nb) -{ - spin_lock(&clockevents_lock); - raw_notifier_chain_unregister(&clockevents_chain, nb); - spin_unlock(&clockevents_lock); -} - /* * Notify about a clock event change. Called with clockevents_lock * held. |