From 85a793533524f333e8d630dc22450e574b7e08d2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Mar 2008 20:06:24 -0700 Subject: [SPARC64]: Make save_stack_trace() more efficient. Doing a 'flushw' every stack trace capture creates so much overhead that it makes lockdep next to unusable. We only care about the frame pointer chain and the function caller program counters, so flush those by hand to the stack frame. This is significantly more efficient than a 'flushw' because: 1) We only save 16 bytes per active register window to the stack. 2) This doesn't push the entire register window context of the current call chain out of the cpu, forcing register window fill traps as we return back down. Note that we can't use 'restore' and 'save' instructions to move around the register windows because that wouldn't work on Niagara processors. They optimize 'save' into a new register window by simply clearing out the registers instead of pulling them in from the on-chip register window backing store. Based upon a report by Tom Callaway. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 30 ++++++++++++++++++++++++++++++ arch/sparc64/kernel/stacktrace.c | 4 +++- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'arch/sparc64/kernel') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 6be4d2d2904..49eca4b1cf2 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -1705,6 +1705,36 @@ __flushw_user: 2: retl nop + /* Flush %fp and %i7 to the stack for all register + * windows active inside of the cpu. This allows + * show_stack_trace() to avoid using an expensive + * 'flushw'. + */ + .globl stack_trace_flush + .type stack_trace_flush,#function +stack_trace_flush: + rdpr %pstate, %o0 + wrpr %o0, PSTATE_IE, %pstate + + rdpr %cwp, %g1 + rdpr %canrestore, %g2 + sub %g1, 1, %g3 + +1: brz,pn %g2, 2f + sub %g2, 1, %g2 + wrpr %g3, %cwp + stx %fp, [%sp + STACK_BIAS + RW_V9_I6] + stx %i7, [%sp + STACK_BIAS + RW_V9_I7] + ba,pt %xcc, 1b + sub %g3, 1, %g3 + +2: wrpr %g1, %cwp + wrpr %o0, %pstate + + retl + nop + .size stack_trace_flush,.-stack_trace_flush + #ifdef CONFIG_SMP .globl hard_smp_processor_id hard_smp_processor_id: diff --git a/arch/sparc64/kernel/stacktrace.c b/arch/sparc64/kernel/stacktrace.c index 47f92a59be1..84d39e873e8 100644 --- a/arch/sparc64/kernel/stacktrace.c +++ b/arch/sparc64/kernel/stacktrace.c @@ -2,13 +2,15 @@ #include #include #include +#include void save_stack_trace(struct stack_trace *trace) { unsigned long ksp, fp, thread_base; struct thread_info *tp = task_thread_info(current); - flushw_all(); + stack_trace_flush(); + __asm__ __volatile__( "mov %%fp, %0" : "=r" (ksp) -- cgit v1.2.3