diff options
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r-- | kernel/hrtimer.c | 342 |
1 files changed, 293 insertions, 49 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b8e4dce80a7..47e63349d1b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -517,7 +517,7 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) if (!base->first) continue; timer = rb_entry(base->first, struct hrtimer, node); - expires = ktime_sub(timer->expires, base->offset); + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires.tv64 < cpu_base->expires_next.tv64) cpu_base->expires_next = expires; } @@ -539,10 +539,10 @@ static int hrtimer_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; - ktime_t expires = ktime_sub(timer->expires, base->offset); + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); int res; - WARN_ON_ONCE(timer->expires.tv64 < 0); + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); /* * When the callback is running, we do not reprogram the clock event @@ -664,25 +664,17 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, /* Timer is expired, act upon the callback mode */ switch(timer->cb_mode) { - case HRTIMER_CB_IRQSAFE_NO_RESTART: - debug_hrtimer_deactivate(timer); - /* - * We can call the callback from here. No restart - * happens, so no danger of recursion - */ - BUG_ON(timer->function(timer) != HRTIMER_NORESTART); - return 1; - case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: + case HRTIMER_CB_IRQSAFE_PERCPU: + case HRTIMER_CB_IRQSAFE_UNLOCKED: /* * This is solely for the sched tick emulation with * dynamic tick support to ensure that we do not * restart the tick right on the edge and end up with * the tick timer in the softirq ! The calling site - * takes care of this. + * takes care of this. Also used for hrtimer sleeper ! */ debug_hrtimer_deactivate(timer); return 1; - case HRTIMER_CB_IRQSAFE: case HRTIMER_CB_SOFTIRQ: /* * Move everything else into the softirq pending list ! @@ -794,7 +786,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) u64 orun = 1; ktime_t delta; - delta = ktime_sub(now, timer->expires); + delta = ktime_sub(now, hrtimer_get_expires(timer)); if (delta.tv64 < 0) return 0; @@ -806,8 +798,8 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) s64 incr = ktime_to_ns(interval); orun = ktime_divns(delta, incr); - timer->expires = ktime_add_ns(timer->expires, incr * orun); - if (timer->expires.tv64 > now.tv64) + hrtimer_add_expires_ns(timer, incr * orun); + if (hrtimer_get_expires_tv64(timer) > now.tv64) return orun; /* * This (and the ktime_add() below) is the @@ -815,7 +807,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) */ orun++; } - timer->expires = ktime_add_safe(timer->expires, interval); + hrtimer_add_expires(timer, interval); return orun; } @@ -847,7 +839,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, * We dont care about collisions. Nodes with * the same expiry time stay together. */ - if (timer->expires.tv64 < entry->expires.tv64) { + if (hrtimer_get_expires_tv64(timer) < + hrtimer_get_expires_tv64(entry)) { link = &(*link)->rb_left; } else { link = &(*link)->rb_right; @@ -944,9 +937,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) } /** - * hrtimer_start - (re)start an relative timer on the current CPU + * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU * @timer: the timer to be added * @tim: expiry time + * @delta_ns: "slack" range for the timer * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) * * Returns: @@ -954,7 +948,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * 1 when the timer was active */ int -hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, + const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; @@ -982,7 +977,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) #endif } - timer->expires = tim; + hrtimer_set_expires_range_ns(timer, tim, delta_ns); timer_stats_hrtimer_set_start_info(timer); @@ -1015,8 +1010,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) return ret; } +EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); + +/** + * hrtimer_start - (re)start an hrtimer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int +hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +{ + return hrtimer_start_range_ns(timer, tim, 0, mode); +} EXPORT_SYMBOL_GPL(hrtimer_start); + /** * hrtimer_try_to_cancel - try to deactivate a timer * @timer: hrtimer to stop @@ -1076,7 +1089,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) ktime_t rem; base = lock_hrtimer_base(timer, &flags); - rem = ktime_sub(timer->expires, base->get_time()); + rem = hrtimer_expires_remaining(timer); unlock_hrtimer_base(timer, &flags); return rem; @@ -1108,7 +1121,7 @@ ktime_t hrtimer_get_next_event(void) continue; timer = rb_entry(base->first, struct hrtimer, node); - delta.tv64 = timer->expires.tv64; + delta.tv64 = hrtimer_get_expires_tv64(timer); delta = ktime_sub(delta, base->get_time()); if (delta.tv64 < mindelta.tv64) mindelta.tv64 = delta.tv64; @@ -1187,6 +1200,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) enum hrtimer_restart (*fn)(struct hrtimer *); struct hrtimer *timer; int restart; + int emulate_hardirq_ctx = 0; timer = list_entry(cpu_base->cb_pending.next, struct hrtimer, cb_entry); @@ -1195,10 +1209,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) timer_stats_account_hrtimer(timer); fn = timer->function; + /* + * A timer might have been added to the cb_pending list + * when it was migrated during a cpu-offline operation. + * Emulate hardirq context for such timers. + */ + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || + timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) + emulate_hardirq_ctx = 1; + __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); spin_unlock_irq(&cpu_base->lock); - restart = fn(timer); + if (unlikely(emulate_hardirq_ctx)) { + local_irq_disable(); + restart = fn(timer); + local_irq_enable(); + } else + restart = fn(timer); spin_lock_irq(&cpu_base->lock); @@ -1245,7 +1273,8 @@ static void __run_hrtimer(struct hrtimer *timer) timer_stats_account_hrtimer(timer); fn = timer->function; - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || + timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) { /* * Used for scheduler timers, avoid lock inversion with * rq->lock and tasklist_lock. @@ -1308,10 +1337,23 @@ void hrtimer_interrupt(struct clock_event_device *dev) timer = rb_entry(node, struct hrtimer, node); - if (basenow.tv64 < timer->expires.tv64) { + /* + * The immediate goal for using the softexpires is + * minimizing wakeups, not running timers at the + * earliest interrupt after their soft expiration. + * This allows us to avoid using a Priority Search + * Tree, which can answer a stabbing querry for + * overlapping intervals and instead use the simple + * BST we already have. + * We don't add extra wakeups by delaying timers that + * are right-of a not yet expired timer, because that + * timer will have to trigger a wakeup anyway. + */ + + if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { ktime_t expires; - expires = ktime_sub(timer->expires, + expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires.tv64 < expires_next.tv64) expires_next = expires; @@ -1347,6 +1389,30 @@ void hrtimer_interrupt(struct clock_event_device *dev) raise_softirq(HRTIMER_SOFTIRQ); } +/** + * hrtimer_peek_ahead_timers -- run soft-expired timers now + * + * hrtimer_peek_ahead_timers will peek at the timer queue of + * the current cpu and check if there are any timers for which + * the soft expires time has passed. If any such timers exist, + * they are run immediately and then removed from the timer queue. + * + */ +void hrtimer_peek_ahead_timers(void) +{ + struct tick_device *td; + unsigned long flags; + + if (!hrtimer_hres_active()) + return; + + local_irq_save(flags); + td = &__get_cpu_var(tick_cpu_device); + if (td && td->evtdev) + hrtimer_interrupt(td->evtdev); + local_irq_restore(flags); +} + static void run_hrtimer_softirq(struct softirq_action *h) { run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); @@ -1401,9 +1467,7 @@ void hrtimer_run_queues(void) if (!base->first) continue; - if (base->get_softirq_time) - base->softirq_time = base->get_softirq_time(); - else if (gettime) { + if (gettime) { hrtimer_get_softirq_time(cpu_base); gettime = 0; } @@ -1414,7 +1478,8 @@ void hrtimer_run_queues(void) struct hrtimer *timer; timer = rb_entry(node, struct hrtimer, node); - if (base->softirq_time.tv64 <= timer->expires.tv64) + if (base->softirq_time.tv64 <= + hrtimer_get_expires_tv64(timer)) break; if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { @@ -1452,7 +1517,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) sl->timer.function = hrtimer_wakeup; sl->task = task; #ifdef CONFIG_HIGH_RES_TIMERS - sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; #endif } @@ -1462,7 +1527,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod do { set_current_state(TASK_INTERRUPTIBLE); - hrtimer_start(&t->timer, t->timer.expires, mode); + hrtimer_start_expires(&t->timer, mode); if (!hrtimer_active(&t->timer)) t->task = NULL; @@ -1484,7 +1549,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) struct timespec rmt; ktime_t rem; - rem = ktime_sub(timer->expires, timer->base->get_time()); + rem = hrtimer_expires_remaining(timer); if (rem.tv64 <= 0) return 0; rmt = ktime_to_timespec(rem); @@ -1503,7 +1568,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, HRTIMER_MODE_ABS); - t.timer.expires.tv64 = restart->nanosleep.expires; + hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); if (do_nanosleep(&t, HRTIMER_MODE_ABS)) goto out; @@ -1528,9 +1593,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, struct restart_block *restart; struct hrtimer_sleeper t; int ret = 0; + unsigned long slack; + + slack = current->timer_slack_ns; + if (rt_task(current)) + slack = 0; hrtimer_init_on_stack(&t.timer, clockid, mode); - t.timer.expires = timespec_to_ktime(*rqtp); + hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); if (do_nanosleep(&t, mode)) goto out; @@ -1550,7 +1620,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart->fn = hrtimer_nanosleep_restart; restart->nanosleep.index = t.timer.base->index; restart->nanosleep.rmtp = rmtp; - restart->nanosleep.expires = t.timer.expires.tv64; + restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); ret = -ERESTART_RESTARTBLOCK; out: @@ -1591,49 +1661,123 @@ static void __cpuinit init_hrtimers_cpu(int cpu) #ifdef CONFIG_HOTPLUG_CPU -static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base) +static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, + struct hrtimer_clock_base *new_base, int dcpu) { struct hrtimer *timer; struct rb_node *node; + int raise = 0; while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); debug_hrtimer_deactivate(timer); - __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); + + /* + * Should not happen. Per CPU timers should be + * canceled _before_ the migration code is called + */ + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) { + __remove_hrtimer(timer, old_base, + HRTIMER_STATE_INACTIVE, 0); + WARN(1, "hrtimer (%p %p)active but cpu %d dead\n", + timer, timer->function, dcpu); + continue; + } + + /* + * Mark it as STATE_MIGRATE not INACTIVE otherwise the + * timer could be seen as !active and just vanish away + * under us on another CPU + */ + __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); timer->base = new_base; /* * Enqueue the timer. Allow reprogramming of the event device */ enqueue_hrtimer(timer, new_base, 1); + +#ifdef CONFIG_HIGH_RES_TIMERS + /* + * Happens with high res enabled when the timer was + * already expired and the callback mode is + * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The + * enqueue code does not move them to the soft irq + * pending list for performance/latency reasons, but + * in the migration state, we need to do that + * otherwise we end up with a stale timer. + */ + if (timer->state == HRTIMER_STATE_MIGRATE) { + timer->state = HRTIMER_STATE_PENDING; + list_add_tail(&timer->cb_entry, + &new_base->cpu_base->cb_pending); + raise = 1; + } +#endif + /* Clear the migration state bit */ + timer->state &= ~HRTIMER_STATE_MIGRATE; + } + return raise; +} + +#ifdef CONFIG_HIGH_RES_TIMERS +static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, + struct hrtimer_cpu_base *new_base) +{ + struct hrtimer *timer; + int raise = 0; + + while (!list_empty(&old_base->cb_pending)) { + timer = list_entry(old_base->cb_pending.next, + struct hrtimer, cb_entry); + + __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0); + timer->base = &new_base->clock_base[timer->base->index]; + list_add_tail(&timer->cb_entry, &new_base->cb_pending); + raise = 1; } + return raise; +} +#else +static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, + struct hrtimer_cpu_base *new_base) +{ + return 0; } +#endif static void migrate_hrtimers(int cpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i; + int i, raise = 0; BUG_ON(cpu_online(cpu)); old_base = &per_cpu(hrtimer_bases, cpu); new_base = &get_cpu_var(hrtimer_bases); tick_cancel_sched_timer(cpu); - - local_irq_disable(); - spin_lock(&new_base->lock); + /* + * The caller is globally serialized and nobody else + * takes two locks at once, deadlock is not possible. + */ + spin_lock_irq(&new_base->lock); spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i]); + if (migrate_hrtimer_list(&old_base->clock_base[i], + &new_base->clock_base[i], cpu)) + raise = 1; } + if (migrate_hrtimer_pending(old_base, new_base)) + raise = 1; + spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); - local_irq_enable(); + spin_unlock_irq(&new_base->lock); put_cpu_var(hrtimer_bases); + + if (raise) + hrtimer_raise_softirq(); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -1678,3 +1822,103 @@ void __init hrtimers_init(void) #endif } +/** + * schedule_hrtimeout_range - sleep until timeout + * @expires: timeout value (ktime_t) + * @delta: slack in expires timeout (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * The @delta argument gives the kernel the freedom to schedule the + * actual wakeup to a time that is both power and performance friendly. + * The kernel give the normal best effort behavior for "@expires+@delta", + * but may decide to fire the timer earlier, but no earlier than @expires. + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, + const enum hrtimer_mode mode) +{ + struct hrtimer_sleeper t; + + /* + * Optimize when a zero timeout value is given. It does not + * matter whether this is an absolute or a relative time. + */ + if (expires && !expires->tv64) { + __set_current_state(TASK_RUNNING); + return 0; + } + + /* + * A NULL parameter means "inifinte" + */ + if (!expires) { + schedule(); + __set_current_state(TASK_RUNNING); + return -EINTR; + } + + hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); + hrtimer_set_expires_range_ns(&t.timer, *expires, delta); + + hrtimer_init_sleeper(&t, current); + + hrtimer_start_expires(&t.timer, mode); + if (!hrtimer_active(&t.timer)) + t.task = NULL; + + if (likely(t.task)) + schedule(); + + hrtimer_cancel(&t.timer); + destroy_hrtimer_on_stack(&t.timer); + + __set_current_state(TASK_RUNNING); + + return !t.task ? 0 : -EINTR; +} +EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); + +/** + * schedule_hrtimeout - sleep until timeout + * @expires: timeout value (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout(ktime_t *expires, + const enum hrtimer_mode mode) +{ + return schedule_hrtimeout_range(expires, 0, mode); +} +EXPORT_SYMBOL_GPL(schedule_hrtimeout); |