From 77ae365eca895061c8bf2b2e3ae1d9ea62869739 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Mar 2009 11:00:29 -0400 Subject: ring-buffer: make lockless This patch converts the ring buffers into a completely lockless buffer recording system. The read side still takes locks since we still serialize readers. But the writers are the ones that must be lockless (those can happen in NMIs). The main change is to the "head_page" pointer. We write to the tail, and read from the head. The "head_page" pointer in the cpu buffer is now just a reference to where to look. The real head page is now kept in the head_page->list->prev->next pointer. That is, in the list head of the previous page we set flags. The list pages are allocated to be aligned such that the lowest significant bits are always zero pointing to the list. This gives us play to put in flags to their pointers. bit 0: set when the page is a head page bit 1: set when the writer is moving the page (for overwrite mode) cmpxchg is used to update the pointer. When the writer wraps the buffer and the tail meets the head, in overwrite mode, the writer must move the head page forward. It first uses cmpxchg to change the pointer flag from 1 to 2. Once this is done, the reader on another CPU will not take the page from the buffer. The writers need to protect against interrupts (we don't bother with disabling interrupts because NMIs are allowed to write too). After the writer sets the pointer flag to 2, it takes care to manage interrupts coming in. This is discribed in detail within the comments of the code. Changes in version 2: - Let reader reset entries value of header page. - Fix tail page passing commit page on reader page test. - Always increment entries and write counter in rb_tail_page_update - Add safety check in rb_set_commit_to_write to break out of infinite loop - add mask in rb_is_reader_page [ Impact: lock free writing to the ring buffer ] Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 29f8599e6be..7fca71693ae 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -170,7 +170,6 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); -unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, -- cgit v1.2.3 From 68fd60a8c8bca6af51805c45f286f0f2572ac977 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 16 Jul 2009 10:53:34 +0800 Subject: tracing/events: add missing type info of dynamic arrays The format file doesn't contain enough information for __dynamic_array/__string. The type name is missing. Before: # cat format: name: irq_handler_entry ... field:__data_loc name; offset:16; size:2; After: # cat format: name: irq_handler_entry ... field:__data_loc char[] name; offset:16; size:2; Signed-off-by: Lai Jiangshan LKML-Reference: <4A5E962E.9020900@cn.fujitsu.com> Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1867553c61e..cc78943e003 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -120,7 +120,7 @@ #undef __dynamic_array #define __dynamic_array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ + ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __data_loc_##item), \ @@ -279,7 +279,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __dynamic_array #define __dynamic_array(type, item, len) \ - ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\ + ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ offsetof(typeof(field), __data_loc_##item), \ sizeof(field.__data_loc_##item), 0); -- cgit v1.2.3 From 7d536cb3fb9993bdcd5a2fbaa6b0670ded4e101c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 16 Jul 2009 10:54:02 +0800 Subject: tracing/events: record the size of dynamic arrays When a dynamic array is defined, we add __data_loc_foo in trace_entry to record the offset of the array, but the size of the array is not recorded, which causes 2 problems: - the event filter just compares the first 2 chars of the strings. - parsers can't parse dynamic arrays. So we encode the size of each dynamic array in the higher 16 bits of __data_loc_foo, while the offset is in lower 16 bits. Signed-off-by: Li Zefan LKML-Reference: <4A5E964A.9000403@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc78943e003..3cbb96ef34f 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -25,7 +25,7 @@ #define __array(type, item, len) type item[len]; #undef __dynamic_array -#define __dynamic_array(type, item, len) unsigned short __data_loc_##item; +#define __dynamic_array(type, item, len) u32 __data_loc_##item; #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -51,13 +51,14 @@ * Include the following: * * struct ftrace_data_offsets_ { - * int ; - * int ; + * u32 ; + * u32 ; * [...] * }; * - * The __dynamic_array() macro will create each int , this is + * The __dynamic_array() macro will create each u32 , this is * to keep the offset of each array from the beginning of the event. + * The size of an array is also encoded, in the higher 16 bits of . */ #undef __field @@ -67,7 +68,7 @@ #define __array(type, item, len) #undef __dynamic_array -#define __dynamic_array(type, item, len) int item; +#define __dynamic_array(type, item, len) u32 item; #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -207,7 +208,7 @@ ftrace_format_##call(struct trace_seq *s) \ #undef __get_dynamic_array #define __get_dynamic_array(field) \ - ((void *)__entry + __entry->__data_loc_##field) + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) #undef __get_str #define __get_str(field) (char *)__get_dynamic_array(field) @@ -325,6 +326,7 @@ ftrace_define_fields_##call(void) \ #define __dynamic_array(type, item, len) \ __data_offsets->item = __data_size + \ offsetof(typeof(*entry), __data); \ + __data_offsets->item |= (len * sizeof(type)) << 16; \ __data_size += (len) * sizeof(type); #undef __string -- cgit v1.2.3 From 1f9963cbb0280e0cd554161e00f1a0eeddbf1ae1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 20 Jul 2009 10:20:53 +0800 Subject: tracing/filters: improve subsystem filter Currently a subsystem filter should be applicable to all events under the subsystem, and if it failed, all the event filters will be cleared. Those behaviors make subsys filter much less useful: # echo 'vec == 1' > irq/softirq_entry/filter # echo 'irq == 5' > irq/filter bash: echo: write error: Invalid argument # cat irq/softirq_entry/filter none I'd expect it set the filter for irq_handler_entry/exit, and not touch softirq_entry/exit. The basic idea is, try to see if the filter can be applied to which events, and then just apply to the those events: # echo 'vec == 1' > softirq_entry/filter # echo 'irq == 5' > filter # cat irq_handler_entry/filter irq == 5 # cat softirq_entry/filter vec == 1 Changelog for v2: - do some cleanups to address Frederic's comments. Inspired-by: Steven Rostedt Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker LKML-Reference: <4A63D485.7030703@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5c093ffc655..26d3673d514 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -101,6 +101,8 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); +struct event_filter; + struct ftrace_event_call { struct list_head list; char *name; @@ -116,7 +118,7 @@ struct ftrace_event_call { int (*define_fields)(void); struct list_head fields; int filter_active; - void *filter; + struct event_filter *filter; void *mod; #ifdef CONFIG_EVENT_PROFILE -- cgit v1.2.3 From 066e0378c23f0a3db730893f6a041e4a3922a385 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:23 -0400 Subject: tracing: Call arch_init_ftrace_syscalls at boot Call arch_init_ftrace_syscalls at boot, so we can determine early the set of syscalls for the syscall trace events. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/trace/syscall.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 8cfe515cbc4..c55fcce4fbb 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -19,7 +19,6 @@ struct syscall_metadata { }; #ifdef CONFIG_FTRACE_SYSCALLS -extern void arch_init_ftrace_syscalls(void); extern struct syscall_metadata *syscall_nr_to_meta(int nr); extern void start_ftrace_syscalls(void); extern void stop_ftrace_syscalls(void); -- cgit v1.2.3 From 63fbdab3157b72467013fe4dcf88c85e45280ef7 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:27 -0400 Subject: tracing: Add DECLARE_TRACE_WITH_CALLBACK() macro Introduce a new 'DECLARE_TRACE_WITH_CALLBACK()' macro, so that tracepoints can associate an external register/unregister function. This prepares for the syscalls tracer conversion to trace events. We will need to perform arch level operations once a syscall event is turned on/off, such as TIF flags setting, hence the need of such specific callbacks. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/tracepoint.h | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index b9dc4ca0246..5984ed04c03 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,8 +60,10 @@ struct tracepoint { * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * An optional set of (un)registration functions can be passed to perform any + * additional (un)registration work. */ -#define DECLARE_TRACE(name, proto, args) \ +#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -71,13 +73,30 @@ struct tracepoint { } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_register(#name, (void *)probe); \ + int ret; \ + void (*func)(void) = reg; \ + \ + ret = tracepoint_probe_register(#name, (void *)probe); \ + if (func && !ret) \ + func(); \ + return ret; \ } \ static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_unregister(#name, (void *)probe);\ + int ret; \ + void (*func)(void) = unreg; \ + \ + ret = tracepoint_probe_unregister(#name, (void *)probe);\ + if (func && !ret) \ + func(); \ + return ret; \ } + +#define DECLARE_TRACE(name, proto, args) \ + DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\ + NULL, NULL); + #define DEFINE_TRACE(name) \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) = #name; \ @@ -94,7 +113,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DECLARE_TRACE(name, proto, args) \ +#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -108,6 +127,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } +#define DECLARE_TRACE(name, proto, args) \ + DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\ + NULL, NULL); + #define DEFINE_TRACE(name) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) -- cgit v1.2.3 From a871bd33a6c0bc86fb47cd02ea2650dd43d3d95f Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:31 -0400 Subject: tracing: Add syscall tracepoints add two tracepoints in syscall exit and entry path, conditioned on TIF_SYSCALL_FTRACE. Supports the syscall trace event code. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/trace/syscall.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/trace/syscall.h b/include/trace/syscall.h index c55fcce4fbb..3951d774de1 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -1,8 +1,28 @@ #ifndef _TRACE_SYSCALL_H #define _TRACE_SYSCALL_H +#include + #include + +extern void syscall_regfunc(void); +extern void syscall_unregfunc(void); + +DECLARE_TRACE_WITH_CALLBACK(syscall_enter, + TP_PROTO(struct pt_regs *regs, long id), + TP_ARGS(regs, id), + syscall_regfunc, + syscall_unregfunc +); + +DECLARE_TRACE_WITH_CALLBACK(syscall_exit, + TP_PROTO(struct pt_regs *regs, long ret), + TP_ARGS(regs, ret), + syscall_regfunc, + syscall_unregfunc +); + /* * A syscall entry in the ftrace syscalls array. * -- cgit v1.2.3 From 69fd4f0eb2ececbf8ade55e31a933e174965745e Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:44 -0400 Subject: tracing: Add ftrace_event_call void * 'data' field add an optional void * pointer to 'ftrace_event_call' that is passed in for regfunc and unregfunc. This prepares for syscall tracepoints creation by passing the name of the syscall we want to trace and then retrieve its number through our arch syscall table. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/ftrace_event.h | 5 +++-- include/trace/ftrace.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ac8c6f8cf24..8544f121d9f 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -112,8 +112,8 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); + int (*regfunc)(void *); + void (*unregfunc)(void *); int id; int (*raw_init)(void); int (*show_format)(struct trace_seq *s); @@ -122,6 +122,7 @@ struct ftrace_event_call { int filter_active; struct event_filter *filter; void *mod; + void *data; atomic_t profile_count; int (*profile_enable)(struct ftrace_event_call *); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 25d3b02a06f..46d81b5e861 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -568,7 +568,7 @@ static void ftrace_raw_event_##call(proto) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void) \ +static int ftrace_raw_reg_event_##call(void *ptr) \ { \ int ret; \ \ @@ -579,7 +579,7 @@ static int ftrace_raw_reg_event_##call(void) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void) \ +static void ftrace_raw_unreg_event_##call(void *ptr) \ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ -- cgit v1.2.3 From fb34a08c3469b2be9eae626ccb96476b4687b810 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:47 -0400 Subject: tracing: Add trace events for each syscall entry/exit Layer Frederic's syscall tracer on tracepoints. We create trace events via hooking into the SYSCALL_DEFINE macros. This allows us to individually toggle syscall entry and exit points on/off. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/syscalls.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++-- include/trace/syscall.h | 18 +++++++------- 2 files changed, 68 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 80de7003d8c..5e5b4d33a31 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -64,6 +64,7 @@ struct perf_counter_attr; #include #include #include +#include #include #include #include @@ -112,6 +113,59 @@ struct perf_counter_attr; #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) + +#define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static struct ftrace_event_call event_enter_##sname; \ + static int init_enter_##sname(void) \ + { \ + int num; \ + num = syscall_name_to_nr("sys"#sname); \ + if (num < 0) \ + return -ENOSYS; \ + register_ftrace_event(&event_syscall_enter); \ + INIT_LIST_HEAD(&event_enter_##sname.fields); \ + init_preds(&event_enter_##sname); \ + return 0; \ + } \ + static struct ftrace_event_call __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_events"))) \ + event_enter_##sname = { \ + .name = "sys_enter"#sname, \ + .system = "syscalls", \ + .event = &event_syscall_enter, \ + .raw_init = init_enter_##sname, \ + .regfunc = reg_event_syscall_enter, \ + .unregfunc = unreg_event_syscall_enter, \ + .data = "sys"#sname, \ + } + +#define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static struct ftrace_event_call event_exit_##sname; \ + static int init_exit_##sname(void) \ + { \ + int num; \ + num = syscall_name_to_nr("sys"#sname); \ + if (num < 0) \ + return -ENOSYS; \ + register_ftrace_event(&event_syscall_exit); \ + INIT_LIST_HEAD(&event_exit_##sname.fields); \ + init_preds(&event_exit_##sname); \ + return 0; \ + } \ + static struct ftrace_event_call __used \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_events"))) \ + event_exit_##sname = { \ + .name = "sys_exit"#sname, \ + .system = "syscalls", \ + .event = &event_syscall_exit, \ + .raw_init = init_exit_##sname, \ + .regfunc = reg_event_syscall_exit, \ + .unregfunc = unreg_event_syscall_exit, \ + .data = "sys"#sname, \ + } + #define SYSCALL_METADATA(sname, nb) \ static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ @@ -121,7 +175,9 @@ struct perf_counter_attr; .nb_args = nb, \ .types = types_##sname, \ .args = args_##sname, \ - } + }; \ + SYSCALL_TRACE_ENTER_EVENT(sname); \ + SYSCALL_TRACE_EXIT_EVENT(sname); #define SYSCALL_DEFINE0(sname) \ static const struct syscall_metadata __used \ @@ -131,8 +187,9 @@ struct perf_counter_attr; .name = "sys_"#sname, \ .nb_args = 0, \ }; \ + SYSCALL_TRACE_ENTER_EVENT(_##sname); \ + SYSCALL_TRACE_EXIT_EVENT(_##sname); \ asmlinkage long sys_##sname(void) - #else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) #endif diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 3951d774de1..73fb8b4a995 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -2,6 +2,8 @@ #define _TRACE_SYSCALL_H #include +#include +#include #include @@ -40,15 +42,13 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern struct syscall_metadata *syscall_nr_to_meta(int nr); -extern void start_ftrace_syscalls(void); -extern void stop_ftrace_syscalls(void); -extern void ftrace_syscall_enter(struct pt_regs *regs); -extern void ftrace_syscall_exit(struct pt_regs *regs); -#else -static inline void start_ftrace_syscalls(void) { } -static inline void stop_ftrace_syscalls(void) { } -static inline void ftrace_syscall_enter(struct pt_regs *regs) { } -static inline void ftrace_syscall_exit(struct pt_regs *regs) { } +extern int syscall_name_to_nr(char *name); +extern struct trace_event event_syscall_enter; +extern struct trace_event event_syscall_exit; +extern int reg_event_syscall_enter(void *ptr); +extern void unreg_event_syscall_enter(void *ptr); +extern int reg_event_syscall_exit(void *ptr); +extern void unreg_event_syscall_exit(void *ptr); #endif #endif /* _TRACE_SYSCALL_H */ -- cgit v1.2.3 From 64c12e0444fcc6b75eb49144ba46d43dbdc6bc8f Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:52:53 -0400 Subject: tracing: Add individual syscalls tracepoint id support The current state of syscalls tracepoints generates only one event id for every syscall events. This patch associates an id with each syscall trace event, so that we can identify each syscall trace event using the 'perf' tool. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/syscalls.h | 22 ++++++++++++++++++---- include/trace/syscall.h | 8 ++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5e5b4d33a31..ce4b01c658e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -116,13 +116,20 @@ struct perf_counter_attr; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct ftrace_event_call event_enter_##sname; \ + struct trace_event enter_syscall_print_##sname = { \ + .trace = print_syscall_enter, \ + }; \ static int init_enter_##sname(void) \ { \ - int num; \ + int num, id; \ num = syscall_name_to_nr("sys"#sname); \ if (num < 0) \ return -ENOSYS; \ - register_ftrace_event(&event_syscall_enter); \ + id = register_ftrace_event(&enter_syscall_print_##sname);\ + if (!id) \ + return -ENODEV; \ + event_enter_##sname.id = id; \ + set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ init_preds(&event_enter_##sname); \ return 0; \ @@ -142,13 +149,20 @@ struct perf_counter_attr; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct ftrace_event_call event_exit_##sname; \ + struct trace_event exit_syscall_print_##sname = { \ + .trace = print_syscall_exit, \ + }; \ static int init_exit_##sname(void) \ { \ - int num; \ + int num, id; \ num = syscall_name_to_nr("sys"#sname); \ if (num < 0) \ return -ENOSYS; \ - register_ftrace_event(&event_syscall_exit); \ + id = register_ftrace_event(&exit_syscall_print_##sname);\ + if (!id) \ + return -ENODEV; \ + event_exit_##sname.id = id; \ + set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ init_preds(&event_exit_##sname); \ return 0; \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 73fb8b4a995..df628404241 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -32,23 +32,31 @@ DECLARE_TRACE_WITH_CALLBACK(syscall_exit, * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) + * @enter_id: associated ftrace enter event id + * @exit_id: associated ftrace exit event id */ struct syscall_metadata { const char *name; int nb_args; const char **types; const char **args; + int enter_id; + int exit_id; }; #ifdef CONFIG_FTRACE_SYSCALLS extern struct syscall_metadata *syscall_nr_to_meta(int nr); extern int syscall_name_to_nr(char *name); +void set_syscall_enter_id(int num, int id); +void set_syscall_exit_id(int num, int id); extern struct trace_event event_syscall_enter; extern struct trace_event event_syscall_exit; extern int reg_event_syscall_enter(void *ptr); extern void unreg_event_syscall_enter(void *ptr); extern int reg_event_syscall_exit(void *ptr); extern void unreg_event_syscall_exit(void *ptr); +enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); +enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif #endif /* _TRACE_SYSCALL_H */ -- cgit v1.2.3 From f4b5ffccc83c82947f5d9f15d6f1b6edb1b71cd7 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2009 16:53:02 -0400 Subject: tracing: Add perf counter support for syscalls tracing The perf counter support is automated for usual trace events. But we have to define specific callbacks for this to handle syscalls trace events Make 'perf stat -e syscalls:sys_enter_blah' work with syscall style tracepoints. Signed-off-by: Jason Baron Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Signed-off-by: Frederic Weisbecker --- include/linux/perf_counter.h | 2 ++ include/linux/syscalls.h | 52 +++++++++++++++++++++++++++++++++++++++++++- include/trace/syscall.h | 7 ++++++ 3 files changed, 60 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a9d823a93fe..8e6460fb4c0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -734,6 +734,8 @@ extern int sysctl_perf_counter_mlock; extern int sysctl_perf_counter_sample_rate; extern void perf_counter_init(void); +extern void perf_tpcounter_event(int event_id, u64 addr, u64 count, + void *record, int entry_size); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ce4b01c658e..5541e75e140 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -98,6 +98,53 @@ struct perf_counter_attr; #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) +#ifdef CONFIG_EVENT_PROFILE +#define TRACE_SYS_ENTER_PROFILE(sname) \ +static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + if (!atomic_inc_return(&event_enter_##sname.profile_count)) \ + ret = reg_prof_syscall_enter("sys"#sname); \ + return ret; \ +} \ + \ +static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\ +{ \ + if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \ + unreg_prof_syscall_enter("sys"#sname); \ +} + +#define TRACE_SYS_EXIT_PROFILE(sname) \ +static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + if (!atomic_inc_return(&event_exit_##sname.profile_count)) \ + ret = reg_prof_syscall_exit("sys"#sname); \ + return ret; \ +} \ + \ +static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ +{ \ + if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \ + unreg_prof_syscall_exit("sys"#sname); \ +} + +#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = prof_sysenter_enable_##sname, \ + .profile_disable = prof_sysenter_disable_##sname, + +#define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = prof_sysexit_enable_##sname, \ + .profile_disable = prof_sysexit_disable_##sname, +#else +#define TRACE_SYS_ENTER_PROFILE(sname) +#define TRACE_SYS_ENTER_PROFILE_INIT(sname) +#define TRACE_SYS_EXIT_PROFILE(sname) +#define TRACE_SYS_EXIT_PROFILE_INIT(sname) +#endif + #ifdef CONFIG_FTRACE_SYSCALLS #define __SC_STR_ADECL1(t, a) #a #define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__) @@ -113,7 +160,6 @@ struct perf_counter_attr; #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__) #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) - #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static struct ftrace_event_call event_enter_##sname; \ struct trace_event enter_syscall_print_##sname = { \ @@ -134,6 +180,7 @@ struct perf_counter_attr; init_preds(&event_enter_##sname); \ return 0; \ } \ + TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -145,6 +192,7 @@ struct perf_counter_attr; .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ .data = "sys"#sname, \ + TRACE_SYS_ENTER_PROFILE_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ @@ -167,6 +215,7 @@ struct perf_counter_attr; init_preds(&event_exit_##sname); \ return 0; \ } \ + TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -178,6 +227,7 @@ struct perf_counter_attr; .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ .data = "sys"#sname, \ + TRACE_SYS_EXIT_PROFILE_INIT(sname) \ } #define SYSCALL_METADATA(sname, nb) \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index df628404241..3ab6dd18fa3 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -57,6 +57,13 @@ extern int reg_event_syscall_exit(void *ptr); extern void unreg_event_syscall_exit(void *ptr); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); +#endif +#ifdef CONFIG_EVENT_PROFILE +int reg_prof_syscall_enter(char *name); +void unreg_prof_syscall_enter(char *name); +int reg_prof_syscall_exit(char *name); +void unreg_prof_syscall_exit(char *name); + #endif #endif /* _TRACE_SYSCALL_H */ -- cgit v1.2.3 From e8f9f4d79a677f55c8ec3acbe87b33a87e2df0de Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Aug 2009 17:42:52 +0200 Subject: tracing: Add ftrace event call parameter to its field descriptor handler Add the struct ftrace_event_call as a parameter of its show_format() callback. This way we can use it from the syscall trace events to retrieve the syscall name from the ftrace event call parameter and describe its fields using the syscalls metadata. Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron --- include/linux/ftrace_event.h | 3 ++- include/trace/ftrace.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 8544f121d9f..189806b6e69 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -116,7 +116,8 @@ struct ftrace_event_call { void (*unregfunc)(void *); int id; int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); + int (*show_format)(struct ftrace_event_call *call, + struct trace_seq *s); int (*define_fields)(void); struct list_head fields; int filter_active; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 46d81b5e861..b250b061657 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -151,7 +151,8 @@ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct trace_seq *s) \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ -- cgit v1.2.3 From dc4ddb4c0b7348f1c9759ae8a9e7d734dc1cda82 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Aug 2009 19:03:54 +0200 Subject: tracing: Add fields format definition for syscall events Define the format of the syscall trace fields to parse the binary values from a raw trace using the syscall events "format" file. This is defined dynamically using the syscalls metadata. It prepares the export of syscall event raw records to perf counters. Example: $ cat /debug/tracing/events/syscalls/sys_enter_sched_getparam/format name: sys_enter_sched_getparam ID: 39 format: field:unsigned short common_type; offset:0; size:2; field:unsigned char common_flags; offset:2; size:1; field:unsigned char common_preempt_count; offset:3; size:1; field:int common_pid; offset:4; size:4; field:int common_tgid; offset:8; size:4; field:pid_t pid; offset:12; size:8; field:struct sched_param * param; offset:20; size:8; print fmt: "pid: 0x%08lx, param: 0x%08lx", ((unsigned long)(REC->pid)), ((unsigned long)(REC->param)) Signed-off-by: Frederic Weisbecker Cc: Lai Jiangshan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron --- include/linux/syscalls.h | 1 + include/trace/syscall.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5541e75e140..87d06c173dd 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -189,6 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .system = "syscalls", \ .event = &event_syscall_enter, \ .raw_init = init_enter_##sname, \ + .show_format = ftrace_format_syscall, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ .data = "sys"#sname, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 3ab6dd18fa3..0cb03625edd 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -55,6 +55,8 @@ extern int reg_event_syscall_enter(void *ptr); extern void unreg_event_syscall_enter(void *ptr); extern int reg_event_syscall_exit(void *ptr); extern void unreg_event_syscall_exit(void *ptr); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -- cgit v1.2.3 From 7ead8b8313d92b3a69a1a61b0dcbc4cd66c960dc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 17 Aug 2009 16:56:28 +0800 Subject: tracing/events: Add module tracepoints Add trace points to trace module_load, module_free, module_get, module_put and module_request, and use trace_event facility to get the trace output. Here's the sample output: TASK-PID CPU# TIMESTAMP FUNCTION | | | | | <...>-42 [000] 1.758380: module_request: fb0 wait=1 call_site=fb_open ... <...>-60 [000] 3.269403: module_load: scsi_wait_scan <...>-60 [000] 3.269432: module_put: scsi_wait_scan call_site=sys_init_module refcnt=0 <...>-61 [001] 3.273168: module_free: scsi_wait_scan ... <...>-1021 [000] 13.836081: module_load: sunrpc <...>-1021 [000] 13.840589: module_put: sunrpc call_site=sys_init_module refcnt=-1 <...>-1027 [000] 13.848098: module_get: sunrpc call_site=try_module_get refcnt=0 <...>-1027 [000] 13.848308: module_get: sunrpc call_site=get_filesystem refcnt=1 <...>-1027 [000] 13.848692: module_put: sunrpc call_site=put_filesystem refcnt=0 ... modprobe-2587 [001] 1088.437213: module_load: trace_events_sample F modprobe-2587 [001] 1088.437786: module_put: trace_events_sample call_site=sys_init_module refcnt=0 Note: - the taints flag can be 'F', 'C' and/or 'P' if mod->taints != 0 - the module refcnt is percpu, so it can be negative in a specific cpu Signed-off-by: Li Zefan Acked-by: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Rusty Russell LKML-Reference: <4A891B3C.5030608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/module.h | 14 ++++- include/trace/events/module.h | 126 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 include/trace/events/module.h (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 098bdb7bfac..f8f92d015ef 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,10 +17,12 @@ #include #include #include -#include +#include #include +#include + /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) @@ -462,7 +464,10 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu) static inline void __module_get(struct module *module) { if (module) { - local_inc(__module_ref_addr(module, get_cpu())); + unsigned int cpu = get_cpu(); + local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); put_cpu(); } } @@ -473,8 +478,11 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); - if (likely(module_is_live(module))) + if (likely(module_is_live(module))) { local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); + } else ret = 0; put_cpu(); diff --git a/include/trace/events/module.h b/include/trace/events/module.h new file mode 100644 index 00000000000..84160fb1847 --- /dev/null +++ b/include/trace/events/module.h @@ -0,0 +1,126 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM module + +#if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MODULE_H + +#include + +#ifdef CONFIG_MODULES + +struct module; + +#define show_module_flags(flags) __print_flags(flags, "", \ + { (1UL << TAINT_PROPRIETARY_MODULE), "P" }, \ + { (1UL << TAINT_FORCED_MODULE), "F" }, \ + { (1UL << TAINT_CRAP), "C" }) + +TRACE_EVENT(module_load, + + TP_PROTO(struct module *mod), + + TP_ARGS(mod), + + TP_STRUCT__entry( + __field( unsigned int, taints ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->taints = mod->taints; + __assign_str(name, mod->name); + ), + + TP_printk("%s %s", __get_str(name), show_module_flags(__entry->taints)) +); + +TRACE_EVENT(module_free, + + TP_PROTO(struct module *mod), + + TP_ARGS(mod), + + TP_STRUCT__entry( + __string( name, mod->name ) + ), + + TP_fast_assign( + __assign_str(name, mod->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(module_get, + + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + + TP_ARGS(mod, ip, refcnt), + + TP_STRUCT__entry( + __field( unsigned long, ip ) + __field( int, refcnt ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->refcnt = refcnt; + __assign_str(name, mod->name); + ), + + TP_printk("%s call_site=%pf refcnt=%d", + __get_str(name), (void *)__entry->ip, __entry->refcnt) +); + +TRACE_EVENT(module_put, + + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + + TP_ARGS(mod, ip, refcnt), + + TP_STRUCT__entry( + __field( unsigned long, ip ) + __field( int, refcnt ) + __string( name, mod->name ) + ), + + TP_fast_assign( + __entry->ip = ip; + __entry->refcnt = refcnt; + __assign_str(name, mod->name); + ), + + TP_printk("%s call_site=%pf refcnt=%d", + __get_str(name), (void *)__entry->ip, __entry->refcnt) +); + +TRACE_EVENT(module_request, + + TP_PROTO(char *name, bool wait, unsigned long ip), + + TP_ARGS(name, wait, ip), + + TP_STRUCT__entry( + __field( bool, wait ) + __field( unsigned long, ip ) + __string( name, name ) + ), + + TP_fast_assign( + __entry->wait = wait; + __entry->ip = ip; + __assign_str(name, name); + ), + + TP_printk("%s wait=%d call_site=%pf", + __get_str(name), (int)__entry->wait, (void *)__entry->ip) +); + +#endif /* CONFIG_MODULES */ + +#endif /* _TRACE_MODULE_H */ + +/* This part must be outside protection */ +#include + -- cgit v1.2.3 From 10a5b66f625904ad5a2867cf7a28073e1236ff32 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Aug 2009 15:53:05 +0800 Subject: tracing/syscalls: Add fields format for exit events Add "format" file for syscall exit events: # cat events/syscalls/sys_exit_open/format name: sys_exit_open ID: 344 format: field:unsigned short common_type; offset:0; size:2; field:unsigned char common_flags; offset:2; size:1; field:unsigned char common_preempt_count; offset:3; size:1; field:int common_pid; offset:4; size:4; field:int common_tgid; offset:8; size:4; field:int nr; offset:12; size:4; field:unsigned long ret; offset:16; size:4; Signed-off-by: Li Zefan Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A8BAF61.3060307@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/syscalls.h | 3 ++- include/trace/syscall.h | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 87d06c173dd..8d57f77794e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -189,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .system = "syscalls", \ .event = &event_syscall_enter, \ .raw_init = init_enter_##sname, \ - .show_format = ftrace_format_syscall, \ + .show_format = syscall_enter_format, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ .data = "sys"#sname, \ @@ -225,6 +225,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .system = "syscalls", \ .event = &event_syscall_exit, \ .raw_init = init_exit_##sname, \ + .show_format = syscall_exit_format, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ .data = "sys"#sname, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 0cb03625edd..5ce85d75d31 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -55,8 +55,10 @@ extern int reg_event_syscall_enter(void *ptr); extern void unreg_event_syscall_enter(void *ptr); extern int reg_event_syscall_exit(void *ptr); extern void unreg_event_syscall_exit(void *ptr); -extern int -ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); +extern int syscall_enter_format(struct ftrace_event_call *call, + struct trace_seq *s); +extern int syscall_exit_format(struct ftrace_event_call *call, + struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -- cgit v1.2.3 From 14be96c9716cb8c46dca94bd890defd7856e0734 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Aug 2009 15:53:52 +0800 Subject: tracing/events: Add ftrace_event_call param to define_fields() This parameter is needed by syscall events to add define_fields() handler. Signed-off-by: Li Zefan Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A8BAF90.6060801@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 2 +- include/trace/ftrace.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 189806b6e69..35b3a4a5ba8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -118,7 +118,7 @@ struct ftrace_event_call { int (*raw_init)(void); int (*show_format)(struct ftrace_event_call *call, struct trace_seq *s); - int (*define_fields)(void); + int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; struct event_filter *filter; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b250b061657..4e81c9b3751 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -294,10 +294,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ int \ -ftrace_define_fields_##call(void) \ +ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ struct ftrace_raw_##call field; \ - struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ __common_field(int, type, 1); \ -- cgit v1.2.3 From e647d6b314266adb904d4b84973eda0afa856946 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Aug 2009 15:54:32 +0800 Subject: tracing/events: Add trace_define_common_fields() Extract duplicate code. Also prepare for the later patch. Signed-off-by: Li Zefan Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A8BAFB8.1010304@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 8 +------- include/trace/ftrace.h | 8 +++----- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 35b3a4a5ba8..427cbae47f8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -142,6 +142,7 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, extern int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size, int is_signed); +extern int trace_define_common_fields(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) @@ -166,11 +167,4 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#define __common_field(type, item, is_signed) \ - ret = trace_define_field(event_call, #type, "common_" #item, \ - offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item), is_signed); \ - if (ret) \ - return ret; - #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4e81c9b3751..127400255e4 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -299,11 +299,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ struct ftrace_raw_##call field; \ int ret; \ \ - __common_field(int, type, 1); \ - __common_field(unsigned char, flags, 0); \ - __common_field(unsigned char, preempt_count, 0); \ - __common_field(int, pid, 1); \ - __common_field(int, tgid, 1); \ + ret = trace_define_common_fields(event_call); \ + if (ret) \ + return ret; \ \ tstruct; \ \ -- cgit v1.2.3 From 540b7b8d65575c80162f2a0f38e1d313c92a6042 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Aug 2009 15:54:51 +0800 Subject: tracing/syscalls: Add filtering support Add filtering support for syscall events: # echo 'mode == 0666' > events/syscalls/sys_enter_open # echo 'ret == 0' > events/syscalls/sys_exit_open # echo 1 > events/syscalls/sys_enter_open # echo 1 > events/syscalls/sys_exit_open # cat trace ... modprobe-3084 [001] 117.463140: sys_open(filename: 917d3e8, flags: 0, mode: 1b6) modprobe-3084 [001] 117.463176: sys_open -> 0x0 less-3086 [001] 117.510455: sys_open(filename: 9c6bdb8, flags: 8000, mode: 1b6) sendmail-2574 [001] 122.145840: sys_open(filename: b807a365, flags: 0, mode: 1b6) ... Signed-off-by: Li Zefan Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4A8BAFCB.1040006@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 5 +++-- include/linux/syscalls.h | 16 +++++++++++----- include/trace/syscall.h | 7 +++++++ 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 427cbae47f8..df5b085c415 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -140,8 +140,9 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event); -extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed); +extern int trace_define_field(struct ftrace_event_call *call, + const char *type, const char *name, + int offset, int size, int is_signed); extern int trace_define_common_fields(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8d57f77794e..f124c899555 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -190,6 +190,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .event = &event_syscall_enter, \ .raw_init = init_enter_##sname, \ .show_format = syscall_enter_format, \ + .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ .data = "sys"#sname, \ @@ -226,6 +227,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .event = &event_syscall_exit, \ .raw_init = init_exit_##sname, \ .show_format = syscall_exit_format, \ + .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ .data = "sys"#sname, \ @@ -233,6 +235,8 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ } #define SYSCALL_METADATA(sname, nb) \ + SYSCALL_TRACE_ENTER_EVENT(sname); \ + SYSCALL_TRACE_EXIT_EVENT(sname); \ static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ @@ -241,20 +245,22 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ .nb_args = nb, \ .types = types_##sname, \ .args = args_##sname, \ - }; \ - SYSCALL_TRACE_ENTER_EVENT(sname); \ - SYSCALL_TRACE_EXIT_EVENT(sname); + .enter_event = &event_enter_##sname, \ + .exit_event = &event_exit_##sname, \ + }; #define SYSCALL_DEFINE0(sname) \ + SYSCALL_TRACE_ENTER_EVENT(_##sname); \ + SYSCALL_TRACE_EXIT_EVENT(_##sname); \ static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ __syscall_meta_##sname = { \ .name = "sys_"#sname, \ .nb_args = 0, \ + .enter_event = &event_enter__##sname, \ + .exit_event = &event_exit__##sname, \ }; \ - SYSCALL_TRACE_ENTER_EVENT(_##sname); \ - SYSCALL_TRACE_EXIT_EVENT(_##sname); \ asmlinkage long sys_##sname(void) #else #define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5ce85d75d31..9661dd406b9 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,6 +34,8 @@ DECLARE_TRACE_WITH_CALLBACK(syscall_exit, * @args: list of args as strings (args[i] matches types[i]) * @enter_id: associated ftrace enter event id * @exit_id: associated ftrace exit event id + * @enter_event: associated syscall_enter trace event + * @exit_event: associated syscall_exit trace event */ struct syscall_metadata { const char *name; @@ -42,6 +44,9 @@ struct syscall_metadata { const char **args; int enter_id; int exit_id; + + struct ftrace_event_call *enter_event; + struct ftrace_event_call *exit_event; }; #ifdef CONFIG_FTRACE_SYSCALLS @@ -59,6 +64,8 @@ extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); +extern int syscall_enter_define_fields(struct ftrace_event_call *call); +extern int syscall_exit_define_fields(struct ftrace_event_call *call); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif -- cgit v1.2.3 From 3d27d8cb34fc156beb86de2338ca4029873a5cc6 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Mon, 24 Aug 2009 14:43:12 -0700 Subject: tracing: Make syscall tracepoints conditional The syscall enter/exit tracepoints are only supported on archs that HAVE_SYSCALL_TRACEPOINTS, so the declarations should be #ifdef'ed. Also, the definition of syscall_regfunc and syscall_unregfunc should depend on this same config, rather than the ftrace-specific one. Signed-off-by: Josh Stone Cc: Jason Baron Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Li Zefan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Lai Jiangshan LKML-Reference: <1251150194-1713-3-git-send-email-jistone@redhat.com> Signed-off-by: Frederic Weisbecker --- include/trace/syscall.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 9661dd406b9..5dcb7e3a544 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -8,6 +8,8 @@ #include +#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS + extern void syscall_regfunc(void); extern void syscall_unregfunc(void); @@ -25,6 +27,8 @@ DECLARE_TRACE_WITH_CALLBACK(syscall_exit, syscall_unregfunc ); +#endif + /* * A syscall entry in the ftrace syscalls array. * -- cgit v1.2.3 From 97419875865859fd2403e66266c02ce028e2f5ab Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Mon, 24 Aug 2009 14:43:13 -0700 Subject: tracing: Move tracepoint callbacks from declaration to definition It's not strictly correct for the tracepoint reg/unreg callbacks to occur when a client is hooking up, because the actual tracepoint may not be present yet. This happens to be fine for syscall, since that's in the core kernel, but it would cause problems for tracepoints defined in a module that hasn't been loaded yet. It also means the reg/unreg has to be EXPORTed for any modules to use the tracepoint (as in SystemTap). This patch removes DECLARE_TRACE_WITH_CALLBACK, and instead introduces DEFINE_TRACE_FN which stores the callbacks in struct tracepoint. The callbacks are used now when the active state of the tracepoint changes in set_tracepoint & disable_tracepoint. This also introduces TRACE_EVENT_FN, so ftrace events can also provide registration callbacks if needed. Signed-off-by: Josh Stone Cc: Jason Baron Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Li Zefan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Lai Jiangshan Cc: Paul Mundt Cc: Martin Schwidefsky Cc: Heiko Carstens LKML-Reference: <1251150194-1713-4-git-send-email-jistone@redhat.com> Signed-off-by: Frederic Weisbecker --- include/linux/tracepoint.h | 46 +++++++++++++++++--------------------------- include/trace/define_trace.h | 5 +++++ include/trace/ftrace.h | 9 +++++++++ include/trace/syscall.h | 12 ++++-------- 4 files changed, 36 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 5984ed04c03..846a4ae501e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -23,6 +23,8 @@ struct tracepoint; struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ + void (*regfunc)(void); + void (*unregfunc)(void); void **funcs; } __attribute__((aligned(32))); /* * Aligned on 32 bytes because it is @@ -60,10 +62,8 @@ struct tracepoint { * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. - * An optional set of (un)registration functions can be passed to perform any - * additional (un)registration work. */ -#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg) \ +#define DECLARE_TRACE(name, proto, args) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -73,36 +73,23 @@ struct tracepoint { } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - int ret; \ - void (*func)(void) = reg; \ - \ - ret = tracepoint_probe_register(#name, (void *)probe); \ - if (func && !ret) \ - func(); \ - return ret; \ + return tracepoint_probe_register(#name, (void *)probe); \ } \ static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - int ret; \ - void (*func)(void) = unreg; \ - \ - ret = tracepoint_probe_unregister(#name, (void *)probe);\ - if (func && !ret) \ - func(); \ - return ret; \ + return tracepoint_probe_unregister(#name, (void *)probe);\ } -#define DECLARE_TRACE(name, proto, args) \ - DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\ - NULL, NULL); - -#define DEFINE_TRACE(name) \ +#define DEFINE_TRACE_FN(name, reg, unreg) \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"), aligned(32))) = \ - { __tpstrtab_##name, 0, NULL } + { __tpstrtab_##name, 0, reg, unreg, NULL } + +#define DEFINE_TRACE(name) \ + DEFINE_TRACE_FN(name, NULL, NULL); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name) @@ -113,7 +100,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg) \ +#define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -127,10 +114,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } -#define DECLARE_TRACE(name, proto, args) \ - DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\ - NULL, NULL); - +#define DEFINE_TRACE_FN(name, reg, unreg) #define DEFINE_TRACE(name) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) @@ -282,10 +266,16 @@ static inline void tracepoint_synchronize_unregister(void) * can also by used by generic instrumentation like SystemTap), and * it is also used to expose a structured trace record in * /sys/kernel/debug/tracing/events/. + * + * A set of (un)registration functions can be passed to the variant + * TRACE_EVENT_FN to perform any (un)registration work. */ #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FN(name, proto, args, struct, \ + assign, print, reg, unreg) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #endif #endif diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index f7a7ae1e8f9..2a969850736 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -26,6 +26,11 @@ #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DEFINE_TRACE(name) +#undef TRACE_EVENT_FN +#define TRACE_EVENT_FN(name, proto, args, tstruct, \ + assign, print, reg, unreg) \ + DEFINE_TRACE_FN(name, reg, unreg) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 127400255e4..3a0b44bdabf 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -42,6 +42,15 @@ }; \ static struct ftrace_event_call event_##name +/* Callbacks are meaningless to ftrace. */ +#undef TRACE_EVENT_FN +#define TRACE_EVENT_FN(name, proto, args, tstruct, \ + assign, print, reg, unreg) \ + TRACE_EVENT(name, TP_PROTO(proto), TP_ARGS(args), \ + TP_STRUCT__entry(tstruct), \ + TP_fast_assign(assign), \ + TP_printk(print)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dcb7e3a544..4e194300185 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -13,18 +13,14 @@ extern void syscall_regfunc(void); extern void syscall_unregfunc(void); -DECLARE_TRACE_WITH_CALLBACK(syscall_enter, +DECLARE_TRACE(syscall_enter, TP_PROTO(struct pt_regs *regs, long id), - TP_ARGS(regs, id), - syscall_regfunc, - syscall_unregfunc + TP_ARGS(regs, id) ); -DECLARE_TRACE_WITH_CALLBACK(syscall_exit, +DECLARE_TRACE(syscall_exit, TP_PROTO(struct pt_regs *regs, long ret), - TP_ARGS(regs, ret), - syscall_regfunc, - syscall_unregfunc + TP_ARGS(regs, ret) ); #endif -- cgit v1.2.3 From 1c569f0264ea629c10bbab471dd0626ce4d3f19f Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Mon, 24 Aug 2009 14:43:14 -0700 Subject: tracing: Create generic syscall TRACE_EVENTs This converts the syscall_enter/exit tracepoints into TRACE_EVENTs, so you can have generic ftrace events that capture all system calls with arguments and return values. These generic events are also renamed to sys_enter/exit, so they're more closely aligned to the specific sys_enter_foo events. Signed-off-by: Josh Stone Cc: Jason Baron Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Li Zefan Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mathieu Desnoyers Cc: Jiaying Zhang Cc: Martin Bligh Cc: Lai Jiangshan Cc: Paul Mundt Cc: Martin Schwidefsky Cc: Heiko Carstens LKML-Reference: <1251150194-1713-5-git-send-email-jistone@redhat.com> Signed-off-by: Frederic Weisbecker --- include/trace/events/syscalls.h | 70 +++++++++++++++++++++++++++++++++++++++++ include/trace/syscall.h | 17 ---------- 2 files changed, 70 insertions(+), 17 deletions(-) create mode 100644 include/trace/events/syscalls.h (limited to 'include') diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h new file mode 100644 index 00000000000..397dff2dbd5 --- /dev/null +++ b/include/trace/events/syscalls.h @@ -0,0 +1,70 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM syscalls + +#if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EVENTS_SYSCALLS_H + +#include + +#include +#include + + +#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS + +extern void syscall_regfunc(void); +extern void syscall_unregfunc(void); + +TRACE_EVENT_FN(sys_enter, + + TP_PROTO(struct pt_regs *regs, long id), + + TP_ARGS(regs, id), + + TP_STRUCT__entry( + __field( long, id ) + __array( unsigned long, args, 6 ) + ), + + TP_fast_assign( + __entry->id = id; + syscall_get_arguments(current, regs, 0, 6, __entry->args); + ), + + TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)", + __entry->id, + __entry->args[0], __entry->args[1], __entry->args[2], + __entry->args[3], __entry->args[4], __entry->args[5]), + + syscall_regfunc, syscall_unregfunc +); + +TRACE_EVENT_FN(sys_exit, + + TP_PROTO(struct pt_regs *regs, long ret), + + TP_ARGS(regs, ret), + + TP_STRUCT__entry( + __field( long, id ) + __field( long, ret ) + ), + + TP_fast_assign( + __entry->id = syscall_get_nr(current, regs); + __entry->ret = ret; + ), + + TP_printk("NR %ld = %ld", + __entry->id, __entry->ret), + + syscall_regfunc, syscall_unregfunc +); + +#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ + +#endif /* _TRACE_EVENTS_SYSCALLS_H */ + +/* This part must be outside protection */ +#include + diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 4e194300185..5dc283ba5ae 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -8,23 +8,6 @@ #include -#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS - -extern void syscall_regfunc(void); -extern void syscall_unregfunc(void); - -DECLARE_TRACE(syscall_enter, - TP_PROTO(struct pt_regs *regs, long id), - TP_ARGS(regs, id) -); - -DECLARE_TRACE(syscall_exit, - TP_PROTO(struct pt_regs *regs, long ret), - TP_ARGS(regs, ret) -); - -#endif - /* * A syscall entry in the ftrace syscalls array. * -- cgit v1.2.3 From f0693c8bd5c50380b299e19d19e7640024640b42 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Aug 2009 14:59:32 -0400 Subject: tracing/sched: show CPU task wakes up on in trace event While debugging the scheduler push / pull algorithm, I found it very annoying that the sched wake up events did not show the CPU that the task was waking on. In order to analyze the scheduler, I needed that information. This patch adds recording of the CPU that a task is waking up on. Signed-off-by: Steven Rostedt --- include/trace/events/sched.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8949bb7eb08..a581ef211ff 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -94,6 +94,7 @@ TRACE_EVENT(sched_wakeup, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) + __field( int, cpu ) ), TP_fast_assign( @@ -101,11 +102,12 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; + __entry->cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d", + TP_printk("task %s:%d [%d] success=%d [%03d]", __entry->comm, __entry->pid, __entry->prio, - __entry->success) + __entry->success, __entry->cpu) ); /* @@ -125,6 +127,7 @@ TRACE_EVENT(sched_wakeup_new, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) + __field( int, cpu ) ), TP_fast_assign( @@ -132,11 +135,12 @@ TRACE_EVENT(sched_wakeup_new, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; + __entry->cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d", + TP_printk("task %s:%d [%d] success=%d [%03d]", __entry->comm, __entry->pid, __entry->prio, - __entry->success) + __entry->success, __entry->cpu) ); /* -- cgit v1.2.3 From 43b51ead3f752a3935116e5b1a94254b8573734f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 Aug 2009 10:33:22 +0800 Subject: tracing/filters: Add __field_ext() to TRACE_EVENT Add __field_ext(), so a field can be assigned to a specific filter_type, which matches a corresponding filter function. For example, a later patch will allow this: __field_ext(const char *, str, FILTER_PTR_STR); Signed-off-by: Li Zefan LKML-Reference: <4A7B9272.6050709@cn.fujitsu.com> [ Fixed a -1 to FILTER_OTHER Forward ported to latest kernel. ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 9 ++++++++- include/trace/ftrace.h | 31 ++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index df5b085c415..0440bea8f6b 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -140,9 +140,16 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event); +enum { + FILTER_OTHER = 0, + FILTER_STATIC_STRING, + FILTER_DYN_STRING, +}; + extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, - int offset, int size, int is_signed); + int offset, int size, int is_signed, + int filter_type); extern int trace_define_common_fields(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 127400255e4..1b1f742a604 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -21,6 +21,9 @@ #undef __field #define __field(type, item) type item; +#undef __field_ext +#define __field_ext(type, item, filter_type) type item; + #undef __array #define __array(type, item, len) type item[len]; @@ -62,7 +65,10 @@ */ #undef __field -#define __field(type, item); +#define __field(type, item) + +#undef __field_ext +#define __field_ext(type, item, filter_type) #undef __array #define __array(type, item, len) @@ -110,6 +116,9 @@ if (!ret) \ return 0; +#undef __field_ext +#define __field_ext(type, item, filter_type) __field(type, item) + #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ @@ -265,28 +274,33 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef __field -#define __field(type, item) \ +#undef __field_ext +#define __field_ext(type, item, filter_type) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ + sizeof(field.item), \ + is_signed_type(type), filter_type); \ if (ret) \ return ret; +#undef __field +#define __field(type, item) __field_ext(type, item, FILTER_OTHER) + #undef __array #define __array(type, item, len) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item), 0); \ + sizeof(field.item), 0, FILTER_OTHER); \ if (ret) \ return ret; #undef __dynamic_array #define __dynamic_array(type, item, len) \ ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ - offsetof(typeof(field), __data_loc_##item), \ - sizeof(field.__data_loc_##item), 0); + offsetof(typeof(field), __data_loc_##item), \ + sizeof(field.__data_loc_##item), 0, \ + FILTER_OTHER); #undef __string #define __string(item, src) __dynamic_array(char, item, -1) @@ -320,6 +334,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __field #define __field(type, item) +#undef __field_ext +#define __field_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) -- cgit v1.2.3 From 87a342f5db69d53ea70493bb1ec69c9047677038 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 7 Aug 2009 10:33:43 +0800 Subject: tracing/filters: Support filtering for char * strings Usually, char * entries are dangerous in traces because the string can be released whereas a pointer to it can still wait to be read from the ring buffer. But sometimes we can assume it's safe, like in case of RO data (eg: __file__ or __line__, used in bkl trace event). If these RO data are in a module and so is the call to the trace event, then it's safe, because the ring buffer will be flushed once this module get unloaded. To allow char * to be treated as a string: TRACE_EVENT(..., TP_STRUCT__entry( __field_ext(const char *, name, FILTER_PTR_STRING) ... ) ... ); The filtering will not dereference "char *" unless the developer explicitly sets FILTER_PTR_STR in __field_ext. Signed-off-by: Li Zefan LKML-Reference: <4A7B9287.90205@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0440bea8f6b..ace2da9e0a0 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -144,6 +144,7 @@ enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, FILTER_DYN_STRING, + FILTER_PTR_STRING, }; extern int trace_define_field(struct ftrace_event_call *call, -- cgit v1.2.3 From 5ac35daa9343936038a3c9c4f4d6d3fe6a2a7bd8 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 25 Aug 2009 14:06:22 +0800 Subject: tracing/events: fix the include file dependencies The TRACE_EVENT depends on the include/linux/tracepoint.h first and include/trace/ftrace.h later, if we include the ftrace.h early, a building error will occur. Both define TRACE_EVENT in trace_a.h and trace_b.h, if we include those in .c file, like this: #define CREATE_TRACE_POINTS include include The above will not work, because the TRACE_EVENT was re-defined by the previous .h file. Reported-by: Wei Yongjun Signed-off-by: Xiao Guangrong LKML-Reference: <4A937F5E.3020802@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 +-- include/trace/define_trace.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 5984ed04c03..81709854f7a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -180,6 +180,7 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args +#endif #ifndef TRACE_EVENT /* @@ -287,5 +288,3 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #endif - -#endif diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index f7a7ae1e8f9..cd150b9d8e3 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -56,6 +56,7 @@ #include #endif +#undef TRACE_EVENT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ -- cgit v1.2.3 From 7cb2e3ee2aeec5b83ecadba929a2dc575dd4008f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Aug 2009 00:32:37 -0400 Subject: tracing: add comments to explain TRACE_EVENT out of protection The commit: commit 5ac35daa9343936038a3c9c4f4d6d3fe6a2a7bd8 Author: Xiao Guangrong tracing/events: fix the include file dependencies Moved the TRACE_EVENT out of the ifdef protection of tracepoints.h but uses the define of TRACE_EVENT itself as protection. This patch adds comments to explain why. Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 81709854f7a..0341f2e2698 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -180,7 +180,15 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args -#endif + +#endif /* _LINUX_TRACEPOINT_H */ + +/* + * Note: we keep the TRACE_EVENT outside the include file ifdef protection. + * This is due to the way trace events work. If a file includes two + * trace event headers under one "CREATE_TRACE_POINTS" the first include + * will override the TRACE_EVENT and break the second include. + */ #ifndef TRACE_EVENT /* @@ -287,4 +295,5 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#endif + +#endif /* ifdef TRACE_EVENT (see note above) */ -- cgit v1.2.3 From 6c347d43eea29221a8ebab9ff9cbe7a00cddac98 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 27 Aug 2009 18:17:34 +0200 Subject: tracing: Undef TRACE_EVENT_FN between trace events headers inclusion The recent commit: tracing/events: fix the include file dependencies fixed a file dependency problem while including more than one trace event header file. This fix undefined TRACE_EVENT after an event header macro preprocessing in order to make tracepoint.h able to correctly declare the tracepoints necessary for the next event header file. But now we also need to undefine TRACE_EVENT_FN at the end of an event header file preprocessing for the same reason. This fixes the following build error: In file included from include/trace/events/napi.h:5, from net/core/net-traces.c:28: include/linux/tracepoint.h:285:1: warning: "TRACE_EVENT_FN" redefined In file included from include/trace/define_trace.h:61, from include/trace/events/skb.h:40, from net/core/net-traces.c:27: include/trace/ftrace.h:50:1: warning: this is the location of the previous definition In file included from include/trace/events/napi.h:5, from net/core/net-traces.c:28: include/linux/tracepoint.h:285:1: warning: "TRACE_EVENT_FN" redefined In file included from include/trace/define_trace.h:61, from include/trace/events/skb.h:40, from net/core/net-traces.c:27: include/trace/ftrace.h:50:1: warning: this is the location of the previous definition Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Xiao Guangrong Cc: Steven Rostedt Cc: Li Zefan LKML-Reference: <20090827161732.GA7618@nowhere> Signed-off-by: Ingo Molnar --- include/trace/define_trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index a89ed590597..2a4b3bf7403 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -62,6 +62,7 @@ #endif #undef TRACE_EVENT +#undef TRACE_EVENT_FN #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ -- cgit v1.2.3 From 0dd7b74787eaf7858c6c573353a83c3e2766e674 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 28 Aug 2009 00:50:06 +0200 Subject: tracing: Fix double CPP substitution in TRACE_EVENT_FN TRACE_EVENT_FN relays on TRACE_EVENT by reprocessing its parameters into the ftrace events CPP macro. This leads to a double substitution in some cases. For example, a bad consequence is a format always prefixed by "%s, %s\n" for every TRACE_EVENT_FN based events. Eg: cat /debug/tracing/events/syscalls/sys_enter/format [...] print fmt: "%s, %s\n", "\"NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)\"",\ "REC->id, REC->args[0], REC->args[1], REC->args[2], REC->args[3],\ REC->args[4], REC->args[5]" This creates a failure in post-processing tools such as perf trace or trace-cmd. Then drop this double substitution and replace it by a new __cpparg() macro that relays CPP arguments containing commas. Signed-off-by: Frederic Weisbecker Cc: Josh Stone Cc: Li Zefan Cc: Steven Rostedt Cc: Jason Baron LKML-Reference: <1251413406-6704-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 360a77ad79e..57c56a998ee 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -45,14 +45,15 @@ }; \ static struct ftrace_event_call event_##name +#undef __cpparg +#define __cpparg(arg...) arg + /* Callbacks are meaningless to ftrace. */ #undef TRACE_EVENT_FN -#define TRACE_EVENT_FN(name, proto, args, tstruct, \ - assign, print, reg, unreg) \ - TRACE_EVENT(name, TP_PROTO(proto), TP_ARGS(args), \ - TP_STRUCT__entry(tstruct), \ - TP_fast_assign(assign), \ - TP_printk(print)) +#define TRACE_EVENT_FN(name, proto, args, tstruct, \ + assign, print, reg, unreg) \ + TRACE_EVENT(name, __cpparg(proto), __cpparg(args), \ + __cpparg(tstruct), __cpparg(assign), __cpparg(print)) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -- cgit v1.2.3 From 8e254c1d183f0225ad21f9049641529e56cce4da Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 31 Aug 2009 16:49:41 +0800 Subject: tracing/filters: Defer pred allocation init_preds() allocates about 5392 bytes of memory (on x86_32) for a TRACE_EVENT. With my config, at system boot total memory occupied is: 5392 * (642 + 15) == 3459KB 642 == cat available_events | wc -l 15 == number of dirs in events/ftrace That's quite a lot, so we'd better defer memory allocation util it's needed, that's when filter is used. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi Cc: Masami Hiramatsu LKML-Reference: <4A9B8EA5.6020700@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 1 - include/linux/syscalls.h | 2 -- include/trace/ftrace.h | 1 - 3 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ace2da9e0a0..755480484eb 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -133,7 +133,6 @@ struct ftrace_event_call { #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 128 -extern int init_preds(struct ftrace_event_call *call); extern void destroy_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern int filter_current_check_discard(struct ftrace_event_call *call, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f124c899555..a8e37821cc6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -177,7 +177,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ event_enter_##sname.id = id; \ set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ - init_preds(&event_enter_##sname); \ return 0; \ } \ TRACE_SYS_ENTER_PROFILE(sname); \ @@ -214,7 +213,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ event_exit_##sname.id = id; \ set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ - init_preds(&event_exit_##sname); \ return 0; \ } \ TRACE_SYS_EXIT_PROFILE(sname); \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 57c56a998ee..bfbc842600a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -622,7 +622,6 @@ static int ftrace_raw_init_event_##call(void) \ return -ENODEV; \ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ - init_preds(&event_##call); \ return 0; \ } \ \ -- cgit v1.2.3 From dc892f7339af2d125478b800edb9081d6149665b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Sep 2009 15:33:41 -0400 Subject: ring-buffer: remove ring_buffer_event_discard The function ring_buffer_event_discard can be used on any item in the ring buffer, even after the item was committed. This function provides no safety nets and is very race prone. An item may be safely removed from the ring buffer before it is committed with the ring_buffer_discard_commit. Since there are currently no users of this function, and because this function is racey and error prone, this patch removes it altogether. Note, removing this function also allows the counters to ignore all discarded events (patches will follow). Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 7fca71693ae..e061b4ecdc3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -74,20 +74,6 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } -/* - * ring_buffer_event_discard can discard any event in the ring buffer. - * it is up to the caller to protect against a reader from - * consuming it or a writer from wrapping and replacing it. - * - * No external protection is needed if this is called before - * the event is commited. But in that case it would be better to - * use ring_buffer_discard_commit. - * - * Note, if an event that has not been committed is discarded - * with ring_buffer_event_discard, it must still be committed. - */ -void ring_buffer_event_discard(struct ring_buffer_event *event); - /* * ring_buffer_discard_commit will remove an event that has not * ben committed yet. If this is used, then ring_buffer_unlock_commit -- cgit v1.2.3 From e77405ad80f53966524b5c31244e13fbbbecbd84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 2 Sep 2009 14:17:06 -0400 Subject: tracing: pass around ring buffer instead of tracer The latency tracers (irqsoff and wakeup) can swap trace buffers on the fly. If an event is happening and has reserved data on one of the buffers, and the latency tracer swaps the global buffer with the max buffer, the result is that the event may commit the data to the wrong buffer. This patch changes the API to the trace recording to be recieve the buffer that was used to reserve a commit. Then this buffer can be passed in to the commit. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 15 ++++++++++----- include/trace/ftrace.h | 15 ++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 755480484eb..23f7179bf74 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -93,13 +93,17 @@ void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); struct ring_buffer_event * -trace_current_buffer_lock_reserve(int type, unsigned long len, +trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, + int type, unsigned long len, unsigned long flags, int pc); -void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, +void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, +void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_current_buffer_discard_commit(struct ring_buffer_event *event); +void trace_current_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); @@ -135,7 +139,8 @@ struct ftrace_event_call { extern void destroy_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); -extern int filter_current_check_discard(struct ftrace_event_call *call, +extern int filter_current_check_discard(struct ring_buffer *buffer, + struct ftrace_event_call *call, void *rec, struct ring_buffer_event *event); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index bfbc842600a..308bafd9332 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -460,13 +460,15 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * { * struct ring_buffer_event *event; * struct ftrace_raw_ *entry; <-- defined in stage 1 + * struct ring_buffer *buffer; * unsigned long irq_flags; * int pc; * * local_save_flags(irq_flags); * pc = preempt_count(); * - * event = trace_current_buffer_lock_reserve(event_.id, + * event = trace_current_buffer_lock_reserve(&buffer, + * event_.id, * sizeof(struct ftrace_raw_), * irq_flags, pc); * if (!event) @@ -476,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * ; <-- Here we assign the entries by the __field and * __array macros. * - * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); * } * * static int ftrace_raw_reg_event_(void) @@ -568,6 +570,7 @@ static void ftrace_raw_event_##call(proto) \ struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ + struct ring_buffer *buffer; \ unsigned long irq_flags; \ int __data_size; \ int pc; \ @@ -577,7 +580,8 @@ static void ftrace_raw_event_##call(proto) \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ - event = trace_current_buffer_lock_reserve(event_##call.id, \ + event = trace_current_buffer_lock_reserve(&buffer, \ + event_##call.id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -589,8 +593,9 @@ static void ftrace_raw_event_##call(proto) \ \ { assign; } \ \ - if (!filter_current_check_discard(event_call, entry, event)) \ - trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ + if (!filter_current_check_discard(buffer, event_call, entry, event)) \ + trace_nowake_buffer_unlock_commit(buffer, \ + event, irq_flags, pc); \ } \ \ static int ftrace_raw_reg_event_##call(void *ptr) \ -- cgit v1.2.3 From 85bac32c4a52c592b857f2c360cc5ec93a097d70 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 4 Sep 2009 14:24:40 -0400 Subject: ring-buffer: only enable ring_buffer_swap_cpu when needed Since the ability to swap the cpu buffers adds a small overhead to the recording of a trace, we only want to add it when needed. Only the irqsoff and preemptoff tracers use this feature, and both are not recommended for production kernels. This patch disables its use when neither irqsoff nor preemptoff is configured. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e061b4ecdc3..5fcc31ed577 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -140,8 +140,17 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer); void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_reset(struct ring_buffer *buffer); +#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, struct ring_buffer *buffer_b, int cpu); +#else +static inline int +ring_buffer_swap_cpu(struct ring_buffer *buffer_a, + struct ring_buffer *buffer_b, int cpu) +{ + return -ENODEV; +} +#endif int ring_buffer_empty(struct ring_buffer *buffer); int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); -- cgit v1.2.3