From c530665c31c0140b74ca7689e7f836177796e5bd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 3 Mar 2010 07:16:16 +0100 Subject: perf: Take a hot regs snapshot for trace events We are taking a wrong regs snapshot when a trace event triggers. Either we use get_irq_regs(), which gives us the interrupted registers if we are in an interrupt, or we use task_pt_regs() which gives us the state before we entered the kernel, assuming we are lucky enough to be no kernel thread, in which case task_pt_regs() returns the initial set of regs when the kernel thread was started. What we want is different. We need a hot snapshot of the regs, so that we can get the instruction pointer to record in the sample, the frame pointer for the callchain, and some other things. Let's use the new perf_fetch_caller_regs() for that. Comparison with perf record -e lock: -R -a -f -g Before: perf [kernel] [k] __do_softirq | --- __do_softirq | |--55.16%-- __open | --44.84%-- __write_nocancel After: perf [kernel] [k] perf_tp_event | --- perf_tp_event | |--41.07%-- lock_acquire | | | |--39.36%-- _raw_spin_lock | | | | | |--7.81%-- hrtimer_interrupt | | | smp_apic_timer_interrupt | | | apic_timer_interrupt The old case was producing unreliable callchains. Now having right frame and instruction pointers, we have the trace we want. Also syscalls and kprobe events already have the right regs, let's use them instead of wasting a retrieval. v2: Follow the rename perf_save_regs() -> perf_fetch_caller_regs() Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Jason Baron Cc: Archs --- kernel/perf_event.c | 8 ++------ kernel/trace/trace_event_profile.c | 3 ++- kernel/trace/trace_kprobe.c | 5 +++-- kernel/trace/trace_syscalls.c | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 359d7f690c2..45b4b6e5589 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4318,9 +4318,8 @@ static const struct pmu perf_ops_task_clock = { #ifdef CONFIG_EVENT_TRACING void perf_tp_event(int event_id, u64 addr, u64 count, void *record, - int entry_size) + int entry_size, struct pt_regs *regs) { - struct pt_regs *regs = get_irq_regs(); struct perf_sample_data data; struct perf_raw_record raw = { .size = entry_size, @@ -4330,12 +4329,9 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, perf_sample_data_init(&data, addr); data.raw = &raw; - if (!regs) - regs = task_pt_regs(current); - /* Trace events already protected against recursion */ do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, - &data, regs); + &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index f0d69300507..e66d21e15a0 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -2,13 +2,14 @@ * trace event based perf counter profiling * * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra - * + * Copyright (C) 2009-2010 Frederic Weisbecker */ #include #include #include "trace.h" +DEFINE_PER_CPU(struct pt_regs, perf_trace_regs); static char *perf_trace_buf; static char *perf_trace_buf_nmi; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 505c92273b1..f7a20a8bfb3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1240,7 +1240,7 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); + ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs); } /* Kretprobe profile handler */ @@ -1271,7 +1271,8 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); + ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, + irq_flags, regs); } static int probe_profile_enable(struct ftrace_event_call *call) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index cba47d7935c..7e6e84fb7b6 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -467,7 +467,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); + ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs); } int prof_sysenter_enable(struct ftrace_event_call *call) @@ -542,7 +542,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); + ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs); } int prof_sysexit_enable(struct ftrace_event_call *call) -- cgit v1.2.3