From ee5f80a993539490a07477ff2526bf62c503fbb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Nov 2008 11:06:00 +0100 Subject: irq: call __irq_enter() before calling the tick_idle_check Impact: avoid spurious ksoftirqd wakeups The tick idle check which is called from irq_enter() was run before the call to __irq_enter() which did not set the in_interrupt() bits in preempt_count. That way the raise of a softirq woke up softirqd for nothing as the softirq was handled on return from interrupt. Call __irq_enter() before calling into the tick idle check code. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/softirq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/softirq.c b/kernel/softirq.c index 7110daeb9a9..e7c69a720d6 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -269,10 +269,11 @@ void irq_enter(void) { int cpu = smp_processor_id(); - if (idle_cpu(cpu) && !in_interrupt()) + if (idle_cpu(cpu) && !in_interrupt()) { + __irq_enter(); tick_check_idle(cpu); - - __irq_enter(); + } else + __irq_enter(); } #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED -- cgit v1.2.3 From ae99286b4f1be7788f2d6947c66a91dbd6351eec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Nov 2008 13:20:23 +0100 Subject: nohz: disable tick_nohz_kick_tick() for now Impact: nohz powersavings and wakeup regression commit fb02fbc14d17837b4b7b02dbb36142c16a7bf208 (NOHZ: restart tick device from irq_enter()) causes a serious wakeup regression. While the patch is correct it does not take into account that spurious wakeups happen on x86. A fix for this issue is available, but we just revert to the .27 behaviour and let long running softirqs screw themself. Disable it for now. Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 5bbb1044f84..342fc9ccab4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void) */ static void tick_nohz_kick_tick(int cpu) { +#if 0 + /* Switch back to 2.6.27 behaviour */ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t delta, now; @@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu) return; tick_nohz_restart(ts, now); +#endif } #else -- cgit v1.2.3 From 5d5254f0d3b9bebc47d97e357374c0ad0c291a7d Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Sat, 25 Oct 2008 10:22:38 +0530 Subject: timers: handle HRTIMER_CB_IRQSAFE_UNLOCKED correctly from softirq context Impact: fix incorrect locking triggered during hotplug-intense stress-tests While migrating the the CB_IRQSAFE_UNLOCKED timers during a cpu-offline, we queue them on the cb_pending list, so that they won't go stale. Thus, when the callbacks of the timers run from the softirq context, they could run into potential deadlocks, since these callbacks assume that they're running with irq's disabled, thereby annoying lockdep! Fix this by emulating hardirq context while running these callbacks from the hrtimer softirq. ================================= [ INFO: inconsistent lock state ] 2.6.27 #2 -------------------------------- inconsistent {in-hardirq-W} -> {hardirq-on-W} usage. ksoftirqd/0/4 [HC0[0]:SC1[1]:HE1:SE0] takes: (&rq->lock){++..}, at: [] sched_rt_period_timer+0x9e/0x1fc {in-hardirq-W} state was registered at: [] __lock_acquire+0x549/0x121e [] native_sched_clock+0x88/0x99 [] clocksource_get_next+0x39/0x3f [] update_wall_time+0x616/0x7df [] lock_acquire+0x5a/0x74 [] scheduler_tick+0x3a/0x18d [] _spin_lock+0x1c/0x45 [] scheduler_tick+0x3a/0x18d [] scheduler_tick+0x3a/0x18d [] update_process_times+0x3a/0x44 [] tick_periodic+0x63/0x6d [] tick_handle_periodic+0x14/0x5e [] timer_interrupt+0x44/0x4a [] handle_IRQ_event+0x13/0x3d [] handle_level_irq+0x79/0xbd [] do_IRQ+0x69/0x7d [] common_interrupt+0x28/0x30 [] aac_probe_one+0x1a3/0x3f3 [] _spin_unlock_irqrestore+0x36/0x39 [] setup_irq+0x1be/0x1f9 [] start_kernel+0x259/0x2c5 [] 0xffffffff irq event stamp: 50102 hardirqs last enabled at (50102): [] _spin_unlock_irq+0x20/0x23 hardirqs last disabled at (50101): [] _spin_lock_irq+0xa/0x4b softirqs last enabled at (50088): [] do_softirq+0x37/0x4d softirqs last disabled at (50099): [] do_softirq+0x37/0x4d other info that might help us debug this: no locks held by ksoftirqd/0/4. stack backtrace: Pid: 4, comm: ksoftirqd/0 Not tainted 2.6.27 #2 [] print_usage_bug+0x13e/0x147 [] mark_lock+0x493/0x797 [] __lock_acquire+0x5be/0x121e [] lock_acquire+0x5a/0x74 [] sched_rt_period_timer+0x9e/0x1fc [] _spin_lock+0x1c/0x45 [] sched_rt_period_timer+0x9e/0x1fc [] sched_rt_period_timer+0x9e/0x1fc [] finish_task_switch+0x41/0xbd [] native_sched_clock+0x88/0x99 [] sched_rt_period_timer+0x0/0x1fc [] run_hrtimer_pending+0x54/0xe5 [] sched_rt_period_timer+0x0/0x1fc [] __do_softirq+0x7b/0xef [] do_softirq+0x37/0x4d [] ksoftirqd+0x56/0xc5 [] ksoftirqd+0x0/0xc5 [] kthread+0x38/0x5d [] kthread+0x0/0x5d [] kernel_thread_helper+0x7/0x10 ======================= Signed-off-by: Gautham R Shenoy Acked-by: Peter Zijlstra Acked-by: "Paul E. McKenney" Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 2b465dfde42..95d3949f2ae 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1209,6 +1209,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) enum hrtimer_restart (*fn)(struct hrtimer *); struct hrtimer *timer; int restart; + int emulate_hardirq_ctx = 0; timer = list_entry(cpu_base->cb_pending.next, struct hrtimer, cb_entry); @@ -1217,10 +1218,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) timer_stats_account_hrtimer(timer); fn = timer->function; + /* + * A timer might have been added to the cb_pending list + * when it was migrated during a cpu-offline operation. + * Emulate hardirq context for such timers. + */ + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || + timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) + emulate_hardirq_ctx = 1; + __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); spin_unlock_irq(&cpu_base->lock); - restart = fn(timer); + if (unlikely(emulate_hardirq_ctx)) { + local_irq_disable(); + restart = fn(timer); + local_irq_enable(); + } else + restart = fn(timer); spin_lock_irq(&cpu_base->lock); -- cgit v1.2.3