From 7515bf59f87f19b2a17972b74230d2f91756fe3c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 25 Aug 2009 14:31:11 +0200 Subject: tracing: Add syscall tracepoints - s390 arch update This patch includes s390 arch updates to synchronize with latest core changes in the syscalls tracing area. - tracing: Map syscall name to number (syscall_name_to_nr()) - tracing: Call arch_init_ftrace_syscalls at boot - tracing: add support tracepoint ids (set_syscall_{enter,exit}_id()) Signed-off-by: Hendrik Brueckner Cc: Jason Baron Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Martin Schwidefsky Cc: Paul Mundt LKML-Reference: <20090825123111.GD4639@cetus.boeblingen.de.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/s390/kernel/ftrace.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 3e298e64f0d..57bdcb1e3cd 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -220,6 +220,29 @@ struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } +int syscall_name_to_nr(char *name) +{ + int i; + + if (!syscalls_metadata) + return -1; + for (i = 0; i < NR_syscalls; i++) + if (syscalls_metadata[i]) + if (!strcmp(syscalls_metadata[i]->name, name)) + return i; + return -1; +} + +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + static struct syscall_metadata *find_syscall_meta(unsigned long syscall) { struct syscall_metadata *start; @@ -237,24 +260,19 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall) return NULL; } -void arch_init_ftrace_syscalls(void) +static int __init arch_init_ftrace_syscalls(void) { struct syscall_metadata *meta; int i; - static atomic_t refs; - - if (atomic_inc_return(&refs) != 1) - goto out; syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, GFP_KERNEL); if (!syscalls_metadata) - goto out; + return -ENOMEM; for (i = 0; i < NR_syscalls; i++) { meta = find_syscall_meta((unsigned long)sys_call_table[i]); syscalls_metadata[i] = meta; } - return; -out: - atomic_dec(&refs); + return 0; } +arch_initcall(arch_init_ftrace_syscalls); #endif -- cgit v1.2.3 From cd0980fc8add25e8ab12fcf1051c0f20cbc7c0c0 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 25 Aug 2009 14:50:27 +0200 Subject: tracing: Check invalid syscall nr while tracing syscalls Most arch syscall_get_nr() implementations returns -1 if the syscall number is not valid. Accessing the bit field without a check might result in a kernel oops (at least I saw it on s390 for ftrace selftest). Before this change, this problem did not occur, because the invalid syscall number (-1) caused syscall_nr_to_meta() to return NULL. There are at least two scenarios where syscall_get_nr() can return -1: 1. For example, ptrace stores an invalid syscall number, and thus, tracing code resets it. (see do_syscall_trace_enter in arch/s390/kernel/ptrace.c) 2. The syscall_regfunc() (kernel/tracepoint.c) sets the TIF_SYSCALL_FTRACE (now: TIF_SYSCALL_TRACEPOINT) flag for all threads which include kernel threads. However, the ftrace selftest triggers a kernel oops when testing syscall trace points: - The kernel thread is started as ususal (do_fork()), - tracing code sets TIF_SYSCALL_FTRACE, - the ret_from_fork() function is triggered and starts ftrace_syscall_exit() with an invalid syscall number. To avoid these scenarios, I suggest to check the syscall_nr. For instance, the ftrace selftest fails for s390 (with config option CONFIG_FTRACE_SYSCALLS set) and produces the following kernel oops. Unable to handle kernel pointer dereference at virtual kernel address 2000000000 Oops: 0038 [#1] PREEMPT SMP Modules linked in: CPU: 0 Not tainted 2.6.31-rc6-next-20090819-dirty #18 Process kthreadd (pid: 818, task: 000000003ea207e8, ksp: 000000003e813eb8) Krnl PSW : 0704100180000000 00000000000ea54c (ftrace_syscall_exit+0x58/0xdc) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:1 PM:0 EA:3 Krnl GPRS: 0000000000000000 00000000000e0000 ffffffffffffffff 20000000008c2650 0000000000000007 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ffffffffffffffff 000000003e813d78 000000003e813f58 0000000000505ba8 000000003e813e18 000000003e813d78 Krnl Code: 00000000000ea540: e330d0000008 ag %r3,0(%r13) 00000000000ea546: a7480007 lhi %r4,7 00000000000ea54a: 1442 nr %r4,%r2 >00000000000ea54c: e31030000090 llgc %r1,0(%r3) 00000000000ea552: 5410d008 n %r1,8(%r13) 00000000000ea556: 8a104000 sra %r1,0(%r4) 00000000000ea55a: 5410d00c n %r1,12(%r13) 00000000000ea55e: 1211 ltr %r1,%r1 Call Trace: ([<0000000000000000>] 0x0) [<000000000001fa22>] do_syscall_trace_exit+0x132/0x18c [<000000000002d0c4>] sysc_return+0x0/0x8 [<000000000001c738>] kernel_thread_starter+0x0/0xc Last Breaking-Event-Address: [<00000000000ea51e>] ftrace_syscall_exit+0x2a/0xdc Signed-off-by: Hendrik Brueckner Acked-by: Heiko Carstens Cc: Jason Baron Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Martin Schwidefsky Cc: Paul Mundt LKML-Reference: <20090825125027.GE4639@cetus.boeblingen.de.ibm.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace_syscalls.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 85291c4de40..cb7f600cb02 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -227,6 +227,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) int syscall_nr; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_enter_syscalls)) return; @@ -257,6 +259,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) int syscall_nr; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_exit_syscalls)) return; -- cgit v1.2.3 From cc3b13c11c567c69a6356be98d0c03ff11541d5c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 25 Aug 2009 18:02:37 +0200 Subject: tracing: Don't trace kernel thread syscalls Kernel threads don't call syscalls using the sysenter/sysexit path. Instead they directly call the sys_* or do_* functions that implement the syscalls inside the kernel. The current syscall tracepoints only bind the sysenter/sysexit path, then it has no effect to trace the kernel thread calls to syscalls in that path. Setting the TIF_SYSCALL_TRACEPOINT flag is then useless for these. Actually there is only one case when a kernel thread can reach the usual syscall exit tracing path: when we create a kernel thread, the child comes to ret_from_fork and is the fork() return is then traced. But this information alone is useless, then we don't want to set the TIF flags for these threads. Kernel threads have task_struct->mm set to NULL. (Thanks to Heiko for that hint ;-) The idea is then to check the mm field in syscall_regfunc() and set the flag accordingly. Signed-off-by: Hendrik Brueckner Cc: Jason Baron Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Martin Schwidefsky Cc: Paul Mundt Cc: Heiko Carstens Cc: Hendrik Brueckner LKML-Reference: <20090825160237.GG4639@cetus.boeblingen.de.ibm.com> Signed-off-by: Frederic Weisbecker --- kernel/tracepoint.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 1a6a453b7ef..9489a0a9b1b 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -597,7 +597,9 @@ void syscall_regfunc(void) if (!sys_tracepoint_refcount) { read_lock_irqsave(&tasklist_lock, flags); do_each_thread(g, t) { - set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); + /* Skip kernel threads. */ + if (t->mm) + set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); } while_each_thread(g, t); read_unlock_irqrestore(&tasklist_lock, flags); } -- cgit v1.2.3 From dd86dda24cc1dc70031a7d9250dc3c0c430a50e2 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 24 Aug 2009 17:40:14 -0400 Subject: tracing: Define NR_syscalls for x86 (32) Add a NR_syscalls #define for x86. This is used in the syscall events tracing code. Todo: make it dynamic like x86_64. NR_syscalls is the usual name used to determine the number of syscalls supported by the current arch. We want to unify the use of this number across archs that support the syscall tracing. This also prepare to move some of the arch code to core code in the syscall tracing area. Signed-off-by: Jason Baron Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Josh Stone Cc: Thomas Gleixner Cc: H. Peter Anwin Cc: Hendrik Brueckner Cc: Heiko Carstens LKML-Reference: <0f33c0f96d198fccc3ddd9ff7f5334ff5cb42706.1251146513.git.jbaron@redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/unistd_32.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 732a3070615..8deaada61bc 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -345,6 +345,8 @@ #ifdef __KERNEL__ +#define NR_syscalls 337 + #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT -- cgit v1.2.3 From a5a2f8e2acb991327952c45a13f5441fc09dffd6 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 26 Aug 2009 12:09:10 -0400 Subject: tracing: Define NR_syscalls for x86_64 Express the available number of syscalls in a standard way by defining NR_syscalls. The common way to define it is to place its definition in asm/unistd.h However, the number of syscalls is defined using __NR_syscall_max in x86-64 after building a dynamic header file "asm-offsets.h" The source file that generates this header, asm-offsets-64.c includes unistd.h, then if we want to express NR_syscalls from __NR_syscall_max in unistd.h only after generating the dynamic header file, we need a watchguard. If unistd.h is included from asm-offsets-64.c, then we are generating asm-offset.h which defines __NR_syscall_max. At this time, we don't want to (we can't) define NR_syscalls, then we do nothing. Otherwise we define NR_syscalls because we know asm-offsets.h has been generated. Signed-off-by: Jason Baron Acked-by: Steven Rostedt Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Lai Jiangshan Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Josh Stone Cc: Thomas Gleixner Cc: H. Peter Anwin Cc: Hendrik Brueckner Cc: Heiko Carstens LKML-Reference: <20090826160910.GB2658@redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/unistd_64.h | 6 ++++++ arch/x86/kernel/asm-offsets_64.c | 1 + 2 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 900e1617e67..b9f3c60de5f 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -688,6 +688,12 @@ __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) #endif /* __NO_STUBS */ #ifdef __KERNEL__ + +#ifndef COMPILE_OFFSETS +#include +#define NR_syscalls (__NR_syscall_max + 1) +#endif + /* * "Conditional" syscalls * diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 898ecc47e12..4a6aeedcd96 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -3,6 +3,7 @@ * This code generates raw asm output which is post-processed to extract * and format the required data. */ +#define COMPILE_OFFSETS #include #include -- cgit v1.2.3 From 57421dbbdc932d65f0e6a41ebb027a2bfe3d0669 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 24 Aug 2009 17:40:22 -0400 Subject: tracing: Convert event tracing code to use NR_syscalls Convert the syscalls event tracing code to use NR_syscalls, instead of FTRACE_SYSCALL_MAX. NR_syscalls is standard accross most arches, and reduces code confusion/complexity. Signed-off-by: Jason Baron Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Josh Stone Cc: Thomas Gleixner Cc: H. Peter Anwin Cc: Hendrik Brueckner Cc: Heiko Carstens LKML-Reference: <9b4f1a84ecae57cc6599412772efa36f0d2b815b.1251146513.git.jbaron@redhat.com> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/ftrace.c | 8 ++++---- kernel/trace/trace_syscalls.c | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3cff1214e17..9dbb527e165 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -494,7 +494,7 @@ static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) struct syscall_metadata *syscall_nr_to_meta(int nr) { - if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0) + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) return NULL; return syscalls_metadata[nr]; @@ -507,7 +507,7 @@ int syscall_name_to_nr(char *name) if (!syscalls_metadata) return -1; - for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { + for (i = 0; i < NR_syscalls; i++) { if (syscalls_metadata[i]) { if (!strcmp(syscalls_metadata[i]->name, name)) return i; @@ -533,13 +533,13 @@ static int __init arch_init_ftrace_syscalls(void) unsigned long **psys_syscall_table = &sys_call_table; syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - FTRACE_SYSCALL_MAX, GFP_KERNEL); + NR_syscalls, GFP_KERNEL); if (!syscalls_metadata) { WARN_ON(1); return -ENOMEM; } - for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { + for (i = 0; i < NR_syscalls; i++) { meta = find_syscall_meta(psys_syscall_table[i]); syscalls_metadata[i] = meta; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index cb7f600cb02..4f5fae6fad9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -11,8 +11,8 @@ static DEFINE_MUTEX(syscall_trace_lock); static int sys_refcount_enter; static int sys_refcount_exit; -static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX); -static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX); +static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); +static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) @@ -289,7 +289,7 @@ int reg_event_syscall_enter(void *ptr) name = (char *)ptr; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_enter) @@ -312,7 +312,7 @@ void unreg_event_syscall_enter(void *ptr) name = (char *)ptr; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); sys_refcount_enter--; @@ -330,7 +330,7 @@ int reg_event_syscall_exit(void *ptr) name = (char *)ptr; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_exit) @@ -353,7 +353,7 @@ void unreg_event_syscall_exit(void *ptr) name = (char *)ptr; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); sys_refcount_exit--; @@ -373,8 +373,8 @@ struct trace_event event_syscall_exit = { #ifdef CONFIG_EVENT_PROFILE -static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX); -static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX); +static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); +static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); static int sys_prof_refcount_enter; static int sys_prof_refcount_exit; @@ -420,7 +420,7 @@ int reg_prof_syscall_enter(char *name) int num; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -442,7 +442,7 @@ void unreg_prof_syscall_enter(char *name) int num; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -481,7 +481,7 @@ int reg_prof_syscall_exit(char *name) int num; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -503,7 +503,7 @@ void unreg_prof_syscall_exit(char *name) int num; num = syscall_name_to_nr(name); - if (num < 0 || num >= FTRACE_SYSCALL_MAX) + if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); -- cgit v1.2.3 From 117226d15850387b55fd01675917ee4fcb9699e8 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 24 Aug 2009 17:40:26 -0400 Subject: tracing: Remove FTRACE_SYSCALL_MAX definitions Remove the FTRACE_SYSCALL_MAX definitions now that we have converted the syscall event tracing code to use NR_syscalls. Signed-off-by: Jason Baron Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Josh Stone Cc: Thomas Gleixner Cc: H. Peter Anwin Cc: Hendrik Brueckner Cc: Heiko Carstens LKML-Reference: Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/ftrace.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 71136545187..db24c2278be 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -28,13 +28,6 @@ #endif -/* FIXME: I don't want to stay hardcoded */ -#ifdef CONFIG_X86_64 -# define FTRACE_SYSCALL_MAX 299 -#else -# define FTRACE_SYSCALL_MAX 337 -#endif - #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ -- cgit v1.2.3