From c47956d9ae3341d2d1998bff26620fa3338c01e4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 23 Dec 2008 23:24:11 -0500 Subject: ftrace: remove obsolete print continue functionality Impact: cleanup, remove obsolete code Now that the ring buffer used by ftrace allows for variable length entries, we do not need the 'cont' feature of the buffer. This code makes other parts of ftrace more complex and by removing this it simplifies the ftrace code. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc7a4f86403..3a357382cce 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -16,7 +16,6 @@ enum trace_type { TRACE_FN, TRACE_CTX, TRACE_WAKE, - TRACE_CONT, TRACE_STACK, TRACE_PRINT, TRACE_SPECIAL, @@ -178,7 +177,6 @@ struct trace_power { * NEED_RESCED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler - * CONT - multiple entries hold the trace item */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, @@ -186,7 +184,6 @@ enum trace_flag_type { TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_CONT = 0x20, }; #define TRACE_BUF_SIZE 1024 @@ -262,7 +259,6 @@ extern void __ftrace_bad_type(void); do { \ IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \ IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ - IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ @@ -489,9 +485,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); -extern void trace_seq_print_cont(struct trace_seq *s, - struct trace_iterator *iter); - extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); -- cgit v1.2.3 From f0868d1e23a8efec33beb3aa688aab7fdb1ae093 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 23 Dec 2008 23:24:12 -0500 Subject: ftrace: set up trace event hash infrastructure Impact: simplify/generalize/refactor trace.c The trace.c file is becoming more difficult to maintain due to the growing number of events. There is several formats that an event may be printed. This patch sets up the infrastructure of an event hash to allow for events to register how they should be printed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3a357382cce..6bd71fa1e1c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -30,7 +30,7 @@ enum trace_type { TRACE_HW_BRANCHES, TRACE_POWER, - __TRACE_LAST_TYPE + __TRACE_LAST_TYPE, }; /* @@ -484,12 +484,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); -extern int -seq_print_ip_sym(struct trace_seq *s, unsigned long ip, - unsigned long sym_flags); -extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, - size_t cnt); extern long ns2usecs(cycle_t nsec); extern int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); -- cgit v1.2.3 From dbd0b4b33074aa6b7832a9d9a5bd985eca5c1aa2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 28 Dec 2008 20:44:51 -0800 Subject: tracing/ftrace: provide the base infrastructure for histogram tracing Impact: extend the tracing API The goal of this patch is to normalize and make more easy the implementation of statistical (histogram) tracing. It implements a trace_stat file into the /debugfs/tracing directory where one can print a one-shot output of statistics/histogram entries. A tracer has to provide two basic iterator callbacks: stat_start() => the first entry stat_next(prev, idx) => the next one. Note that it is adapted for arrays or hash tables or lists.... since it provides a pointer to the previous entry and the current index of the iterator. These two callbacks are called to get a snapshot of the statistics at each opening of the trace_stat file because. The values are so updated between two "cat trace_stat". And the tracer is free to lock its datas during the iteration to keep consistent values. Since it is almost always interesting to sort statisticals values to address the problems by priority, this infrastructure provides a "sorting" of the stat entries too if desired. A tracer has just to provide a stat_cmp callback to compare two entries and the stat tracing infrastructure will build a sorted list of the given entries. A last callback, called stat_headers, can be implemented by a tracer to output headers on its trace. If one of these callbacks is changed on runtime, it just have to signal it to the stat tracing API by calling the init_tracer_stat() helper. Changes in V2: - Fix a memory leak if the user opens multiple times the trace_stat file without closing it. Now we always free our list before rebuilding it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6bd71fa1e1c..05fa804d1c1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -336,6 +336,21 @@ struct tracer { struct tracer *next; int print_max; struct tracer_flags *flags; + + /* + * If you change one of the following on tracing runtime, recall + * init_tracer_stat() + */ + + /* Iteration over statistic entries */ + void *(*stat_start)(void); + void *(*stat_next)(void *prev, int idx); + /* Compare two entries for sorting (optional) for stats */ + int (*stat_cmp)(void *p1, void *p2); + /* Print a stat entry */ + int (*stat_show)(struct seq_file *s, void *p); + /* Print the headers of your stat entries */ + int (*stat_headers)(struct seq_file *s); }; struct trace_seq { @@ -421,6 +436,8 @@ void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); +void init_tracer_stat(struct tracer *trace); + extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_max_latency; -- cgit v1.2.3 From f7d48cbde5c0710008caeaf7dbf14f4a9b064940 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 29 Dec 2008 13:02:17 +0100 Subject: tracing/ftrace: make trace_find_cmdline() generally available Impact: build fix On !CONFIG_CONTEXT_SWITCH_TRACER trace_find_cmdline() is not defined: kernel/trace/trace_output.c: In function 'trace_ctxwake_print': kernel/trace/trace_output.c:499: error: implicit declaration of function 'trace_find_cmdline' kernel/trace/trace_output.c:499: warning: assignment makes pointer from integer without a cast Move it to the generic section in trace.h. Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 05fa804d1c1..a8b624ccd4d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -469,10 +469,10 @@ struct tracer_switch_ops { void *private; struct tracer_switch_ops *next; }; - -char *trace_find_cmdline(int pid); #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ +extern char *trace_find_cmdline(int pid); + #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func -- cgit v1.2.3 From 36994e58a48fb8f9651c7dc845a6de298aba5bfc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Dec 2008 13:42:23 -0800 Subject: tracing/kmemtrace: normalize the raw tracer event to the unified tracing API Impact: new tracer plugin This patch adapts kmemtrace raw events tracing to the unified tracing API. To enable and use this tracer, just do the following: echo kmemtrace > /debugfs/tracing/current_tracer cat /debugfs/tracing/trace You will have the following output: # tracer: kmemtrace # # # ALLOC TYPE REQ GIVEN FLAGS POINTER NODE CALLER # FREE | | | | | | | | # | type_id 1 call_site 18446744071565527833 ptr 18446612134395152256 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345164672 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345164912 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 0 call_site 18446744071565636711 ptr 18446612134345165152 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1 type_id 0 call_site 18446744071566144042 ptr 18446612134346191680 bytes_req 1304 bytes_alloc 1312 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1 type_id 1 call_site 18446744071565585534 ptr 18446612134405955584 That was to stay backward compatible with the format output produced in inux/tracepoint.h. This is the default ouput, but note that I tried something else. If you change an option: echo kmem_minimalistic > /debugfs/trace_options and then cat /debugfs/trace, you will have the following output: # tracer: kmemtrace # # # ALLOC TYPE REQ GIVEN FLAGS POINTER NODE CALLER # FREE | | | | | | | | # | - C 0xffff88007c088780 file_free_rcu + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc780 -1 d_alloc - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc870 -1 d_alloc - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname + K 240 240 000000d0 0xffff8800790dc960 -1 d_alloc + K 1304 1312 000000d0 0xffff8800791d7340 -1 reiserfs_alloc_inode - C 0xffff88007cad6000 putname + K 4096 4096 000000d0 0xffff88007cad6000 -1 getname - C 0xffff88007cad6000 putname + K 992 1000 000000d0 0xffff880079045b58 -1 alloc_inode + K 768 1024 000080d0 0xffff88007c096400 -1 alloc_pipe_info + K 240 240 000000d0 0xffff8800790dca50 -1 d_alloc + K 272 320 000080d0 0xffff88007c088780 -1 get_empty_filp + K 272 320 000080d0 0xffff88007c088000 -1 get_empty_filp Yeah I shall confess kmem_minimalistic should be: kmem_alternative. Whatever, I find it more readable but this a personal opinion of course. We can drop it if you want. On the ALLOC/FREE column, + means an allocation and - a free. On the type column, you have K = kmalloc, C = cache, P = page I would like the flags to be GFP_* strings but that would not be easy to not break the column with strings.... About the node...it seems to always be -1. I don't know why but that shouldn't be difficult to find. I moved linux/tracepoint.h to trace/tracepoint.h as well. I think that would be more easy to find the tracer headers if they are all in their common directory. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cc7a4f86403..534505bb39b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,6 +9,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -29,6 +30,8 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, + TRACE_KMEM_ALLOC, + TRACE_KMEM_FREE, TRACE_POWER, __TRACE_LAST_TYPE @@ -170,6 +173,24 @@ struct trace_power { struct power_trace state_data; }; +struct kmemtrace_alloc_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; + size_t bytes_req; + size_t bytes_alloc; + gfp_t gfp_flags; + int node; +}; + +struct kmemtrace_free_entry { + struct trace_entry ent; + enum kmemtrace_type_id type_id; + unsigned long call_site; + const void *ptr; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -280,6 +301,10 @@ extern void __ftrace_bad_type(void); TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ + TRACE_KMEM_ALLOC); \ + IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ + TRACE_KMEM_FREE); \ __ftrace_bad_type(); \ } while (0) -- cgit v1.2.3 From 034939b65ad5ff64b9709210b3469a95153c51a3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Jan 2009 10:03:56 -0800 Subject: tracing/ftrace: handle more than one stat file per tracer Impact: new API for tracers Make the stat tracing API reentrant. And also provide the new directory /debugfs/tracing/trace_stat which will contain all the stat files for the current active tracer. Now a tracer will, if desired, want to provide a zero terminated array of tracer_stat structures. Each one contains the callbacks necessary for one stat file. It have to provide at least a name for its stat file, an iterator with stat_start/start_next callback and an output callback for one stat entry. Also adapt the branch tracer to this new API. We create two files "all" and "annotated" inside the /debugfs/tracing/trace_stat directory, making the both stats simultaneously available instead of needing to change an option to switch from one stat file to another. The output of these stats haven't changed. Changes in v2: _ Apply the previous memory leak fix (rebase against tip/master) Changes in v3: _ Merge the patch that adapted the branch tracer to this Api in this patch to not break the kernel build. Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 94ed45e93a8..b3f9ad1b4d8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -334,6 +334,25 @@ struct tracer_flags { /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b +/* + * If you want to provide a stat file (one-shot statistics), fill + * an iterator with stat_start/stat_next and a stat_show callbacks. + * The others callbacks are optional. + */ +struct tracer_stat { + /* The name of your stat file */ + const char *name; + /* Iteration over statistic entries */ + void *(*stat_start)(void); + void *(*stat_next)(void *prev, int idx); + /* Compare two entries for sorting (optional) for stats */ + int (*stat_cmp)(void *p1, void *p2); + /* Print a stat entry */ + int (*stat_show)(struct seq_file *s, void *p); + /* Print the headers of your stat entries */ + int (*stat_headers)(struct seq_file *s); +}; + /* * A specific tracer, represented by methods that operate on a trace array: */ @@ -361,21 +380,7 @@ struct tracer { struct tracer *next; int print_max; struct tracer_flags *flags; - - /* - * If you change one of the following on tracing runtime, recall - * init_tracer_stat() - */ - - /* Iteration over statistic entries */ - void *(*stat_start)(void); - void *(*stat_next)(void *prev, int idx); - /* Compare two entries for sorting (optional) for stats */ - int (*stat_cmp)(void *p1, void *p2); - /* Print a stat entry */ - int (*stat_show)(struct seq_file *s, void *p); - /* Print the headers of your stat entries */ - int (*stat_headers)(struct seq_file *s); + struct tracer_stat *stats; }; struct trace_seq { -- cgit v1.2.3 From 002bb86d8d42f18937aef396c3ecd65c7e02e21a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 10 Jan 2009 11:34:13 -0800 Subject: tracing/ftrace: separate events tracing and stats tracing engine Impact: tracing's Api change Currently, the stat tracing depends on the events tracing. When you switch to a new tracer, the stats files of the previous tracer will disappear. But it's more scalable to separate those two engines. This way, we can keep the stat files of one or several tracers when we want, without bothering of multiple tracer stat files or tracer switching. To build/destroys its stats files, a tracer just have to call register_stat_tracer/unregister_stat_tracer everytimes it wants to. Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b3f9ad1b4d8..79c872100dd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -334,24 +334,6 @@ struct tracer_flags { /* Makes more easy to define a tracer opt */ #define TRACER_OPT(s, b) .name = #s, .bit = b -/* - * If you want to provide a stat file (one-shot statistics), fill - * an iterator with stat_start/stat_next and a stat_show callbacks. - * The others callbacks are optional. - */ -struct tracer_stat { - /* The name of your stat file */ - const char *name; - /* Iteration over statistic entries */ - void *(*stat_start)(void); - void *(*stat_next)(void *prev, int idx); - /* Compare two entries for sorting (optional) for stats */ - int (*stat_cmp)(void *p1, void *p2); - /* Print a stat entry */ - int (*stat_show)(struct seq_file *s, void *p); - /* Print the headers of your stat entries */ - int (*stat_headers)(struct seq_file *s); -}; /* * A specific tracer, represented by methods that operate on a trace array: @@ -466,8 +448,6 @@ void tracing_start_sched_switch_record(void); int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); -void init_tracer_stat(struct tracer *trace); - extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_max_latency; -- cgit v1.2.3 From 5361499101306cfb776c3cfa0f69d0479bc63868 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 19:12:40 -0500 Subject: ftrace: add stack trace to function tracer Impact: new feature to stack trace any function Chris Mason asked about being able to pick and choose a function and get a stack trace from it. This feature enables his request. # echo io_schedule > /debug/tracing/set_ftrace_filter # echo function > /debug/tracing/current_tracer # echo func_stack_trace > /debug/tracing/trace_options Produces the following in /debug/tracing/trace: kjournald-702 [001] 135.673060: io_schedule <-sync_buffer kjournald-702 [002] 135.673671: <= sync_buffer <= __wait_on_bit <= out_of_line_wait_on_bit <= __wait_on_buffer <= sync_dirty_buffer <= journal_commit_transaction <= kjournald Note, be careful about turning this on without filtering the functions. You may find that you have a 10 second lag between typing and seeing what you typed. This is why the stack trace for the function tracer does not use the same stack_trace flag as the other tracers use. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 79c872100dd..bf39a369e4b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -457,6 +457,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); +void __trace_stack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, + int skip, int pc); + extern cycle_t ftrace_now(int cpu); #ifdef CONFIG_FUNCTION_TRACER @@ -467,6 +472,8 @@ void tracing_stop_function_trace(void); # define tracing_stop_function_trace() do { } while (0) #endif +extern int ftrace_function_enabled; + #ifdef CONFIG_CONTEXT_SWITCH_TRACER typedef void (*tracer_switch_func_t)(void *private, -- cgit v1.2.3 From a225cdd263f340c864febb1992802fb5b08bc328 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 23:06:03 -0500 Subject: ftrace: remove static from function tracer functions Impact: clean up After reorganizing the functions in trace.c and trace_function.c, they no longer need to be in global context. This patch makes the functions and one variable into static. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index bf39a369e4b..54b72781e92 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -464,16 +464,6 @@ void __trace_stack(struct trace_array *tr, extern cycle_t ftrace_now(int cpu); -#ifdef CONFIG_FUNCTION_TRACER -void tracing_start_function_trace(void); -void tracing_stop_function_trace(void); -#else -# define tracing_start_function_trace() do { } while (0) -# define tracing_stop_function_trace() do { } while (0) -#endif - -extern int ftrace_function_enabled; - #ifdef CONFIG_CONTEXT_SWITCH_TRACER typedef void (*tracer_switch_func_t)(void *private, -- cgit v1.2.3 From b1818748b0cf9427e48acf9713295e829a0d715f Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:31:01 +0100 Subject: x86, ftrace, hw-branch-tracer: dump trace on oops Dump the branch trace on an oops (based on ftrace_dump_on_oops). Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54b72781e92..b96037d970d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -438,7 +438,6 @@ void trace_function(struct trace_array *tr, void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); -- cgit v1.2.3 From c71a896154119f4ca9e89d6078f5f63ad60ef199 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 23 Jan 2009 12:06:27 -0200 Subject: blktrace: add ftrace plugin Impact: New way of using the blktrace infrastructure This drops the requirement of userspace utilities to use the blktrace facility. Configuration is done thru sysfs, adding a "trace" directory to the partition directory where blktrace can be enabled for the associated request_queue. The same filters present in the IOCTL interface are present as sysfs device attributes. The /sys/block/sdX/sdXN/trace/enable file allows tracing without any filters. The other files in this directory: pid, act_mask, start_lba and end_lba can be used with the same meaning as with the IOCTL interface. Using the sysfs interface will only setup the request_queue->blk_trace fields, tracing will only take place when the "blk" tracer is selected via the ftrace interface, as in the following example: To see the trace, one can use the /d/tracing/trace file or the /d/tracign/trace_pipe file, with semantics defined in the ftrace documentation in Documentation/ftrace.txt. [root@f10-1 ~]# cat /t/trace kjournald-305 [000] 3046.491224: 8,1 A WBS 6367 + 8 <- (8,1) 6304 kjournald-305 [000] 3046.491227: 8,1 Q R 6367 + 8 [kjournald] kjournald-305 [000] 3046.491236: 8,1 G RB 6367 + 8 [kjournald] kjournald-305 [000] 3046.491239: 8,1 P NS [kjournald] kjournald-305 [000] 3046.491242: 8,1 I RBS 6367 + 8 [kjournald] kjournald-305 [000] 3046.491251: 8,1 D WB 6367 + 8 [kjournald] kjournald-305 [000] 3046.491610: 8,1 U WS [kjournald] 1 -0 [000] 3046.511914: 8,1 C RS 6367 + 8 [6367] [root@f10-1 ~]# The default line context (prefix) format is the one described in the ftrace documentation, with the blktrace specific bits using its existing format, described in blkparse(8). If one wants to have the classic blktrace formatting, this is possible by using: [root@f10-1 ~]# echo blk_classic > /t/trace_options [root@f10-1 ~]# cat /t/trace 8,1 0 3046.491224 305 A WBS 6367 + 8 <- (8,1) 6304 8,1 0 3046.491227 305 Q R 6367 + 8 [kjournald] 8,1 0 3046.491236 305 G RB 6367 + 8 [kjournald] 8,1 0 3046.491239 305 P NS [kjournald] 8,1 0 3046.491242 305 I RBS 6367 + 8 [kjournald] 8,1 0 3046.491251 305 D WB 6367 + 8 [kjournald] 8,1 0 3046.491610 305 U WS [kjournald] 1 8,1 0 3046.511914 0 C RS 6367 + 8 [6367] [root@f10-1 ~]# Using the ftrace standard format allows more flexibility, such as the ability of asking for backtraces via trace_options: [root@f10-1 ~]# echo noblk_classic > /t/trace_options [root@f10-1 ~]# echo stacktrace > /t/trace_options [root@f10-1 ~]# cat /t/trace kjournald-305 [000] 3318.826779: 8,1 A WBS 6375 + 8 <- (8,1) 6312 kjournald-305 [000] 3318.826782: <= submit_bio <= submit_bh <= sync_dirty_buffer <= journal_commit_transaction <= kjournald <= kthread <= child_rip kjournald-305 [000] 3318.826836: 8,1 Q R 6375 + 8 [kjournald] kjournald-305 [000] 3318.826837: <= generic_make_request <= submit_bio <= submit_bh <= sync_dirty_buffer <= journal_commit_transaction <= kjournald <= kthread Please read the ftrace documentation to use aditional, standardized tracing filters such as /d/tracing/trace_cpumask, etc. See also /d/tracing/trace_mark to add comments in the trace stream, that is equivalent to the /d/block/sdaN/msg interface. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b96037d970d..e603a291134 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -32,6 +32,7 @@ enum trace_type { TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_POWER, + TRACE_BLK, __TRACE_LAST_TYPE, }; -- cgit v1.2.3 From c4a8e8be2d43cc22b371e8e9c05c253409759d94 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 2 Feb 2009 20:29:21 -0200 Subject: trace: better manage the context info for events Impact: make trace_event more convenient for tracers All tracers (for the moment) that use the struct trace_event want to have the context info printed before their own output: the pid/cmdline, cpu, and timestamp. But some other tracers that want to implement their trace_event callbacks will not necessary need these information or they may want to format them as they want. This patch adds a new default-enabled trace option: TRACE_ITER_CONTEXT_INFO When disabled through: echo nocontext-info > /debugfs/tracing/trace_options The pid, cpu and timestamps headers will not be printed. IE with the sched_switch tracer with context-info (default): bash-2935 [001] 100.356561: 2935:120:S ==> [001] 0:140:R -0 [000] 100.412804: 0:140:R + [000] 11:115:S events/0 -0 [000] 100.412816: 0:140:R ==> [000] 11:115:R events/0 events/0-11 [000] 100.412829: 11:115:S ==> [000] 0:140:R Without context-info: 2935:120:S ==> [001] 0:140:R 0:140:R + [000] 11:115:S events/0 0:140:R ==> [000] 11:115:R events/0 11:115:S ==> [000] 0:140:R A tracer can disable it at runtime by clearing the bit TRACE_ITER_CONTEXT_INFO in trace_flags. The print routines were renamed to trace_print_context and trace_print_lat_context, so that they can be used by tracers if they want to use them for one of the trace_event callbacks. Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e603a291134..f0c7a0f08ca 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -405,6 +405,10 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); + +struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, + int *ent_cpu, u64 *ent_ts); + void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); @@ -591,7 +595,8 @@ enum trace_iterator_flags { TRACE_ITER_ANNOTATE = 0x2000, TRACE_ITER_USERSTACKTRACE = 0x4000, TRACE_ITER_SYM_USEROBJ = 0x8000, - TRACE_ITER_PRINTK_MSGONLY = 0x10000 + TRACE_ITER_PRINTK_MSGONLY = 0x10000, + TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */ }; /* -- cgit v1.2.3 From 7be421510b91491d5aa5a29fa1005712039b95af Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 01:13:37 -0500 Subject: trace: Remove unused trace_array_cpu parameter Impact: cleanup Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f0c7a0f08ca..df627a94869 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -419,14 +419,12 @@ void ftrace(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); void tracing_sched_switch_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc); void tracing_record_cmdline(struct task_struct *tsk); void tracing_sched_wakeup_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *cur, unsigned long flags, int pc); @@ -436,7 +434,6 @@ void trace_special(struct trace_array *tr, unsigned long arg2, unsigned long arg3, int pc); void trace_function(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); @@ -462,7 +459,6 @@ void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); void __trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc); -- cgit v1.2.3 From 51a763dd84253bab1d0a1e68e11a7753d1b702ca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 16:14:13 -0200 Subject: tracing: Introduce trace_buffer_{lock_reserve,unlock_commit} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: new API These new functions do what previously was being open coded, reducing the number of details ftrace plugin writers have to worry about. It also standardizes the handling of stacktrace, userstacktrace and other trace options we may introduce in the future. With this patch, for instance, the blk tracer (and some others already in the tree) can use the "userstacktrace" /d/tracing/trace_options facility. $ codiff /tmp/vmlinux.before /tmp/vmlinux.after linux-2.6-tip/kernel/trace/trace.c: trace_vprintk | -5 trace_graph_return | -22 trace_graph_entry | -26 trace_function | -45 __ftrace_trace_stack | -27 ftrace_trace_userstack | -29 tracing_sched_switch_trace | -66 tracing_stop | +1 trace_seq_to_user | -1 ftrace_trace_special | -63 ftrace_special | +1 tracing_sched_wakeup_trace | -70 tracing_reset_online_cpus | -1 13 functions changed, 2 bytes added, 355 bytes removed, diff: -353 linux-2.6-tip/block/blktrace.c: __blk_add_trace | -58 1 function changed, 58 bytes removed, diff: -58 linux-2.6-tip/kernel/trace/trace.c: trace_buffer_lock_reserve | +88 trace_buffer_unlock_commit | +86 2 functions changed, 174 bytes added, diff: +174 /tmp/vmlinux.after: 16 functions changed, 176 bytes added, 413 bytes removed, diff: -237 Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index df627a94869..e03f157c772 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -403,6 +403,17 @@ int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); +struct ring_buffer_event; + +struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, + unsigned char type, + unsigned long len, + unsigned long flags, + int pc); +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc); + struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); -- cgit v1.2.3 From b6f11df26fdc28324cf9c9e3b77f2dc985c1bb13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 5 Feb 2009 18:02:00 -0200 Subject: trace: Call tracing_reset_online_cpus before tracer->init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup To make it easy for ftrace plugin writers, as this was open coded in the existing plugins Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e03f157c772..f2742fb1575 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -395,6 +395,7 @@ struct trace_iterator { cpumask_var_t started; }; +int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); -- cgit v1.2.3 From 1830b52d0de8c60c4f5dfbac134aa8f69d815801 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2009 19:38:43 -0500 Subject: trace: remove deprecated entry->cpu Impact: fix to prevent developers from using entry->cpu With the new ring buffer infrastructure, the cpu for the entry is implicit with which CPU buffer it is on. The original code use to record the current cpu into the generic entry header, which can be retrieved by entry->cpu. When the ring buffer was introduced, the users were convert to use the the cpu number of which cpu ring buffer was in use (this was passed to the tracers by the iterator: iter->cpu). Unfortunately, the cpu item in the entry structure was never removed. This allowed for developers to use it instead of the proper iter->cpu, unknowingly, using an uninitialized variable. This was not the fault of the developers, since it would seem like the logical place to retrieve the cpu identifier. This patch removes the cpu item from the entry structure and fixes all the users that should have been using iter->cpu. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f0c7a0f08ca..5efc4c707f7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -45,7 +45,6 @@ enum trace_type { */ struct trace_entry { unsigned char type; - unsigned char cpu; unsigned char flags; unsigned char preempt_count; int pid; -- cgit v1.2.3 From 57794a9d48b63e34acbe63282628c9f029603308 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Fri, 6 Feb 2009 17:33:27 +0800 Subject: trace: trivial fixes in comment typos. Impact: clean up Fixed several typos in the comments. Signed-off-by: Wenji Huang Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5efc4c707f7..f92aba52a89 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -616,12 +616,12 @@ extern struct tracer nop_trace; * preempt_enable (after a disable), a schedule might take place * causing an infinite recursion. * - * To prevent this, we read the need_recshed flag before + * To prevent this, we read the need_resched flag before * disabling preemption. When we want to enable preemption we * check the flag, if it is set, then we call preempt_enable_no_resched. * Otherwise, we call preempt_enable. * - * The rational for doing the above is that if need resched is set + * The rational for doing the above is that if need_resched is set * and we have yet to reschedule, we are either in an atomic location * (where we do not need to check for scheduling) or we are inside * the scheduler and do not want to resched. @@ -642,7 +642,7 @@ static inline int ftrace_preempt_disable(void) * * This is a scheduler safe way to enable preemption and not miss * any preemption checks. The disabled saved the state of preemption. - * If resched is set, then we were either inside an atomic or + * If resched is set, then we are either inside an atomic or * are inside the scheduler (we would have already scheduled * otherwise). In this case, we do not want to call normal * preempt_enable, but preempt_enable_no_resched instead. -- cgit v1.2.3 From 7447dce96f2233d250bc39a4a10a42f7c3dd46fc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Feb 2009 21:33:57 +0100 Subject: tracing/function-graph-tracer: provide a selftest for the function graph tracer Making it more easy to do a basic regression test for this tracer. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b9838f4a692..a011ec06222 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -500,6 +500,8 @@ extern int DYN_FTRACE_TEST_NAME(void); #ifdef CONFIG_FTRACE_STARTUP_TEST extern int trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_function_graph(struct tracer *trace, + struct trace_array *tr); extern int trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_preemptoff(struct tracer *trace, -- cgit v1.2.3 From 1292211058aaf872eeb2a0e2677d237916b4501f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Feb 2009 22:16:12 +0100 Subject: tracing/power: move the power trace headers to a dedicated file Impact: cleanup Move the power tracer headers to trace/power.h to keep ftrace.h and power bits more easy to maintain as separated topics. Signed-off-by: Frederic Weisbecker Cc: Arjan van de Ven Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a011ec06222..1ecfb9d2b36 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -10,6 +10,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, -- cgit v1.2.3 From b91facc367366b3f71375f337eb5997ec9ab4e69 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Feb 2009 18:30:44 +0100 Subject: tracing/function-graph-tracer: handle the leaf functions from trace_pipe When one cats the trace file, the leaf functions are printed without brackets: function(); whereas in the trace_pipe file we'll see the following: function() { } This is because the ring_buffer handling is not the same between those two files. On the trace file, when an entry is printed, the iterator advanced and then we can check the next entry. There is no iterator with trace_pipe, the current entry to print has been peeked and not consumed. So checking the next entry will still return the current one while we don't consume it. This patch introduces a new value for the output callbacks to ask the tracing core to not consume the current entry after printing it. We need it because we will have to consume the current entry ourself to check the next one. Now the trace_pipe is able to handle well the leaf functions. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1ecfb9d2b36..7b0518adf6d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -63,13 +63,13 @@ struct ftrace_entry { /* Function call entry */ struct ftrace_graph_ent_entry { - struct trace_entry ent; + struct trace_entry ent; struct ftrace_graph_ent graph_ent; }; /* Function return entry */ struct ftrace_graph_ret_entry { - struct trace_entry ent; + struct trace_entry ent; struct ftrace_graph_ret ret; }; extern struct tracer boot_tracer; @@ -309,7 +309,8 @@ extern void __ftrace_bad_type(void); enum print_line_t { TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ TRACE_TYPE_HANDLED = 1, - TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ + TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ + TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; -- cgit v1.2.3 From 3c56819b14b00dd449bd776303e61f8532fad09f Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Mon, 9 Feb 2009 08:15:56 +0200 Subject: tracing: splice support for tracing_pipe Added and implemented tracing_pipe_fops->splice_read(). This allows userspace programs to get tracing data more efficiently. Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7b0518adf6d..dbff0207b21 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -353,6 +353,12 @@ struct tracer { ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos); + ssize_t (*splice_read)(struct trace_iterator *iter, + struct file *filp, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags); #ifdef CONFIG_FTRACE_STARTUP_TEST int (*selftest)(struct tracer *trace, struct trace_array *tr); -- cgit v1.2.3 From 6eaaa5d57e76c454479833fc8594cd7c3b75c789 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Feb 2009 02:25:00 +0100 Subject: tracing/core: use appropriate waiting on trace_pipe Impact: api and pipe waiting change Currently, the waiting used in tracing_read_pipe() is done through a 100 msecs schedule_timeout() loop which periodically check if there are traces on the buffer. This can cause small latencies for programs which are reading the incoming events. This patch makes the reader waiting for the trace_wait waitqueue except for few tracers such as the sched and functions tracers which might be already hold the runqueue lock while waking up the reader. This is performed through a new callback wait_pipe() on struct tracer. If none is implemented on a specific tracer, the default waiting for trace_wait queue is attached. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dbff0207b21..eed732c151f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -337,18 +337,34 @@ struct tracer_flags { #define TRACER_OPT(s, b) .name = #s, .bit = b -/* - * A specific tracer, represented by methods that operate on a trace array: +/** + * struct tracer - a specific tracer and its callbacks to interact with debugfs + * @name: the name chosen to select it on the available_tracers file + * @init: called when one switches to this tracer (echo name > current_tracer) + * @reset: called when one switches to another tracer + * @start: called when tracing is unpaused (echo 1 > tracing_enabled) + * @stop: called when tracing is paused (echo 0 > tracing_enabled) + * @open: called when the trace file is opened + * @pipe_open: called when the trace_pipe file is opened + * @wait_pipe: override how the user waits for traces on trace_pipe + * @close: called when the trace file is released + * @read: override the default read callback on trace_pipe + * @splice_read: override the default splice_read callback on trace_pipe + * @selftest: selftest to run on boot (see trace_selftest.c) + * @print_headers: override the first lines that describe your columns + * @print_line: callback that prints a trace + * @set_flag: signals one of your private flags changed (trace_options file) + * @flags: your private flags */ struct tracer { const char *name; - /* Your tracer should raise a warning if init fails */ int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); + void (*wait_pipe)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, struct file *filp, char __user *ubuf, @@ -432,6 +448,9 @@ void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, int pc); +void default_wait_pipe(struct trace_iterator *iter); +void poll_wait_pipe(struct trace_iterator *iter); + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, -- cgit v1.2.3 From b04cc6b1f6398b0e0b60d37e27ce51b4899672ec Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Feb 2009 03:22:28 +0100 Subject: tracing/core: introduce per cpu tracing files Impact: split up tracing output per cpu Currently, on the tracing debugfs directory, three files are available to the user to let him extracting the trace output: - trace is an iterator through the ring-buffer. It's a reader but not a consumer It doesn't block when no more traces are available. - trace pretty similar to the former, except that it adds more informations such as prempt count, irq flag, ... - trace_pipe is a reader and a consumer, it will also block waiting for traces if necessary (heh, yes it's a pipe). The traces coming from different cpus are curretly mixed up inside these files. Sometimes it messes up the informations, sometimes it's useful, depending on what does the tracer capture. The tracing_cpumask file is useful to filter the output and select only the traces captured a custom defined set of cpus. But still it is not enough powerful to extract at the same time one trace buffer per cpu. So this patch creates a new directory: /debug/tracing/per_cpu/. Inside this directory, you will now find one trace_pipe file and one trace file per cpu. Which means if you have two cpus, you will have: trace0 trace1 trace_pipe0 trace_pipe1 And of course, reading these files will have the same effect than with the usual tracing files, except that you will only see the traces from the given cpu. The original all-in-one cpu trace file are still available on their original place. Until now, only one consumer was allowed on trace_pipe to avoid racy consuming on the ring-buffer. Now the approach changed a bit, you can have only one consumer per cpu. Which means you are allowed to read concurrently trace_pipe0 and trace_pipe1 But you can't have two readers on trace_pipe0 or trace_pipe1. Following the same logic, if there is one reader on the common trace_pipe, you can not have at the same time another reader on trace_pipe0 or in trace_pipe1. Because in trace_pipe is already a consumer in all cpu buffers in essence. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index eed732c151f..508235a39da 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -395,6 +395,8 @@ struct trace_seq { unsigned int readpos; }; +#define TRACE_PIPE_ALL_CPU -1 + /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: @@ -404,6 +406,7 @@ struct trace_iterator { struct tracer *trace; void *private; struct ring_buffer_iter *buffer_iter[NR_CPUS]; + int cpu_file; /* The below is zeroed out in pipe_read */ struct trace_seq seq; -- cgit v1.2.3 From d7350c3f45694104e820041969c8185c5f99e57c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Feb 2009 06:13:16 +0100 Subject: tracing/core: make the read callbacks reentrants Now that several per-cpu files can be read or spliced at the same, we want the read/splice callbacks for tracing files to be reentrants. Until now, a single global mutex (trace_types_lock) serialized the access to tracing_read_pipe(), tracing_splice_read_pipe(), and the seq helpers. Ie: it means that if a user tries to read trace_pipe0 and trace_pipe1 at the same time, the access to the function tracing_read_pipe() is contended and one reader must wait for the other to finish its read call. The trace_type_lock mutex is mostly here to serialize the access to the global current tracer (current_trace), which can be changed concurrently. Although the iter struct keeps a private pointer to this tracer, its callbacks can be changed by another function. The method used here is to not keep anymore private reference to the tracer inside the iterator but to make a copy of it inside the iterator. Then it checks on subsequents read calls if the tracer has changed. This is not costly because the current tracer is not expected to be changed often, so we use a branch prediction for that. Moreover, we add a private mutex to the iterator (there is one iterator per file descriptor) to serialize the accesses in case of multiple consumers per file descriptor (which would be a silly idea from the user). Note that this is not to protect the ring buffer, since the ring buffer already serializes the readers accesses. This is to prevent from traces weirdness in case of concurrent consumers. But these mutexes can be dropped anyway, that would not result in any crash. Just tell me what you think about it. Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 508235a39da..632191770aa 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -405,8 +405,9 @@ struct trace_iterator { struct trace_array *tr; struct tracer *trace; void *private; - struct ring_buffer_iter *buffer_iter[NR_CPUS]; int cpu_file; + struct mutex mutex; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; /* The below is zeroed out in pipe_read */ struct trace_seq seq; -- cgit v1.2.3 From ef5580d0fffce6e0a01043bac0625128b5d409a7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 19:38:04 -0500 Subject: tracing: add interface to write into current tracer buffer Right now all tracers must manage their own trace buffers. This was to enforce tracers to be independent in case we finally decide to allow each tracer to have their own trace buffer. But now we are adding event tracing that writes to the current tracer's buffer. This adds an interface to allow events to write to the current tracer buffer without having to manage its own. Since event tracing has no "tracer", and is just a way to hook into any other tracer. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 632191770aa..adf161f6dd1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -442,6 +442,12 @@ void trace_buffer_unlock_commit(struct trace_array *tr, struct ring_buffer_event *event, unsigned long flags, int pc); +struct ring_buffer_event * +trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, + unsigned long flags, int pc); +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); + struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); -- cgit v1.2.3 From c32e827b25054cb17b79cf97fb5e63ae4ce2223c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 Feb 2009 19:12:30 -0500 Subject: tracing: add raw trace point recording infrastructure Impact: lower overhead tracing The current event tracer can automatically pick up trace points that are registered with the TRACE_FORMAT macro. But it required a printf format string and parsing. Although, this adds the ability to get guaranteed information like task names and such, it took a hit in overhead processing. This processing can add about 500-1000 nanoseconds overhead, but in some cases that too is considered too much and we want to shave off as much from this overhead as possible. Tom Zanussi recently posted tracing patches to lkml that are based on a nice idea about capturing the data via C structs using STRUCT_ENTER, STRUCT_EXIT type of macros. I liked that method very much, but did not like the implementation that required a developer to add data/code in several disjoint locations. This patch extends the event_tracer macros to do a similar "raw C" approach that Tom Zanussi did. But instead of having the developers needing to tweak a bunch of code all over the place, they can do it all in one macro - preferably placed near the code that it is tracing. That makes it much more likely that tracepoints will be maintained on an ongoing basis by the code they modify. The new macro TRACE_EVENT_FORMAT is created for this approach. (Note, a developer may still utilize the more low level DECLARE_TRACE macros if they don't care about getting their traces automatically in the event tracer.) They can also use the existing TRACE_FORMAT if they don't need to code the tracepoint in C, but just want to use the convenience of printf. So if the developer wants to "hardwire" a tracepoint in the fastest possible way, and wants to acquire their data via a user space utility in a raw binary format, or wants to see it in the trace output but not sacrifice any performance, then they can implement the faster but more complex TRACE_EVENT_FORMAT macro. Here's what usage looks like: TRACE_EVENT_FORMAT(name, TPPROTO(proto), TPARGS(args), TPFMT(fmt, fmt_args), TRACE_STUCT( TRACE_FIELD(type1, item1, assign1) TRACE_FIELD(type2, item2, assign2) [...] ), TPRAWFMT(raw_fmt) ); Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT uses. name: is the unique identifier of the trace point proto: The proto type that the trace point uses args: the args in the proto type fmt: printf format to use with the event printf tracer fmt_args: the printf argments to match fmt TRACE_STRUCT starts the ability to create a structure. Each item in the structure is defined with a TRACE_FIELD TRACE_FIELD(type, item, assign) type: the C type of item. item: the name of the item in the stucture assign: what to assign the item in the trace point callback raw_fmt is a way to pretty print the struct. It must match the order of the items are added in TRACE_STUCT An example of this would be: TRACE_EVENT_FORMAT(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p, int success), TPARGS(rq, p, success), TPFMT("task %s:%d %s", p->comm, p->pid, success?"succeeded":"failed"), TRACE_STRUCT( TRACE_FIELD(pid_t, pid, p->pid) TRACE_FIELD(int, success, success) ), TPRAWFMT("task %d success=%d") ); This creates us a unique struct of: struct { pid_t pid; int success; }; And the way the call back would assign these values would be: entry->pid = p->pid; entry->success = success; The nice part about this is that the creation of the assignent is done via macro magic in the event tracer. Once the TRACE_EVENT_FORMAT is created, the developer will then have a faster method to record into the ring buffer. They do not need to worry about the tracer itself. The developer would only need to touch the files in include/trace/*.h Again, I would like to give special thanks to Tom Zanussi for this nice idea. Idea-from: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index adf161f6dd1..aa1ab0cb80a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -726,4 +726,23 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +struct ftrace_event_call { + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + struct dentry *raw_dir; + int raw_enabled; + int (*raw_init)(void); + int (*raw_reg)(void); + void (*raw_unreg)(void); +}; + +void event_trace_printk(unsigned long ip, const char *fmt, ...); +extern struct ftrace_event_call __start_ftrace_events[]; +extern struct ftrace_event_call __stop_ftrace_events[]; + #endif /* _LINUX_KERNEL_TRACE_H */ -- cgit v1.2.3 From fd99498989f3b3feeab89dcadf537138ba136d24 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 28 Feb 2009 02:41:25 -0500 Subject: tracing: add raw fast tracing interface for trace events This patch adds the interface to enable the C style trace points. In the directory /debugfs/tracing/events/subsystem/event We now have three files: enable : values 0 or 1 to enable or disable the trace event. available_types: values 'raw' and 'printf' which indicate the tracing types available for the trace point. If a developer does not use the TRACE_EVENT_FORMAT macro and just uses the TRACE_FORMAT macro, then only 'printf' will be available. This file is read only. type: values 'raw' or 'printf'. This indicates which type of tracing is active for that trace point. 'printf' is the default and if 'raw' is not available, this file is read only. # echo raw > /debug/tracing/events/sched/sched_wakeup/type # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable Will enable the C style tracing for the sched_wakeup trace point. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index aa1ab0cb80a..f6fa0b9f83a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -726,6 +726,12 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +/* trace event type bit fields, not numeric */ +enum { + TRACE_EVENT_TYPE_PRINTF = 1, + TRACE_EVENT_TYPE_RAW = 2, +}; + struct ftrace_event_call { char *name; char *system; @@ -736,6 +742,7 @@ struct ftrace_event_call { int id; struct dentry *raw_dir; int raw_enabled; + int type; int (*raw_init)(void); int (*raw_reg)(void); void (*raw_unreg)(void); -- cgit v1.2.3 From f9520750c4c9924c14325cd951efae5fae58104c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 14:04:40 -0500 Subject: tracing: make trace_seq_reset global and rename to trace_seq_init Impact: clean up The trace_seq functions may be used separately outside of the ftrace iterator. The trace_seq_reset is needed for these operations. This patch also renames trace_seq_reset to the more appropriate trace_seq_init. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f6fa0b9f83a..cf6ba4181b1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -395,6 +395,14 @@ struct trace_seq { unsigned int readpos; }; +static inline void +trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; +} + + #define TRACE_PIPE_ALL_CPU -1 /* -- cgit v1.2.3 From 981d081ec8b958b7d962ee40d433581a55d40fc5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 13:53:59 -0500 Subject: tracing: add format file to describe event struct fields This patch adds the "format" file to the trace point event directory. This is based off of work by Tom Zanussi, in which a file is exported to be tread from user land such that a user space app may read the binary record stored in the ring buffer. # cat /debug/tracing/events/sched/sched_switch/format field:pid_t prev_pid; offset:12; size:4; field:int prev_prio; offset:16; size:4; field special:char next_comm[TASK_COMM_LEN]; offset:20; size:16; field:pid_t next_pid; offset:36; size:4; field:int next_prio; offset:40; size:4; Idea-from: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cf6ba4181b1..e606633fb49 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -754,6 +754,7 @@ struct ftrace_event_call { int (*raw_init)(void); int (*raw_reg)(void); void (*raw_unreg)(void); + int (*show_format)(struct trace_seq *s); }; void event_trace_printk(unsigned long ip, const char *fmt, ...); -- cgit v1.2.3 From 2cadf9135eb3b6d84b6427314be827ddd443c308 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 1 Dec 2008 22:20:19 -0500 Subject: tracing: add binary buffer files for use with splice Impact: new feature This patch creates a directory of files that correspond to the per CPU ring buffers. These are binary files and are made to be used with splice. This is the fastest way to extract data from the ftrace ring buffers. Thanks to Jiaying Zhang for pushing me to get this code fixed, and to Eduard - Gabriel Munteanu for his splice code that helped me debug my code. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e606633fb49..561bb5c5d98 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -217,6 +217,7 @@ enum trace_flag_type { */ struct trace_array_cpu { atomic_t disabled; + void *buffer_page; /* ring buffer spare */ /* these fields get copied into max-trace: */ unsigned long trace_idx; -- cgit v1.2.3 From c032ef64d680717e4e8ce3da65da6419a35f8a2c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Mar 2009 20:34:24 -0500 Subject: tracing: add latency output format option With the removal of the latency_trace file, we lost the ability to see some of the finer details in a trace. Like the state of interrupts enabled, the preempt count, need resched, and if we are in an interrupt handler, softirq handler or not. This patch simply creates an option to bring back the old format. This also removes the warning about an unused variable that held the latency_trace file operations. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 561bb5c5d98..12cd119cca3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -651,7 +651,8 @@ enum trace_iterator_flags { TRACE_ITER_USERSTACKTRACE = 0x4000, TRACE_ITER_SYM_USEROBJ = 0x8000, TRACE_ITER_PRINTK_MSGONLY = 0x10000, - TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */ + TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ + TRACE_ITER_LATENCY_FMT = 0x40000, }; /* -- cgit v1.2.3 From 5e1607a00bd082972629d3d68c95c8bcf902b55a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 10:24:48 +0100 Subject: tracing: rename ftrace_printk() => trace_printk() Impact: cleanup Use a more generic name - this also allows the prototype to move to kernel.h and be generally available to kernel developers who want to do some quick tracing. Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 12cd119cca3..8beff03fda6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -115,7 +115,7 @@ struct userstack_entry { }; /* - * ftrace_printk entry: + * trace_printk entry: */ struct print_entry { struct trace_entry ent; -- cgit v1.2.3 From 1427cdf0592368bdec57276edaf714040ee8744f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 6 Mar 2009 17:21:47 +0100 Subject: tracing: infrastructure for supporting binary record Impact: save on memory for tracing Current tracers are typically using a struct(like struct ftrace_entry, struct ctx_switch_entry, struct special_entr etc...)to record a binary event. These structs can only record a their own kind of events. A new kind of tracer need a new struct and a lot of code too handle it. So we need a generic binary record for events. This infrastructure is for this purpose. [fweisbec@gmail.com: rebase against latest -tip, make it safe while sched tracing as reported by Steven Rostedt] Signed-off-by: Lai Jiangshan Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8beff03fda6..0f5077f8f95 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,6 +20,7 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, + TRACE_BPRINTK, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -124,6 +125,16 @@ struct print_entry { char buf[]; }; +struct bprintk_entry { + struct trace_entry ent; + unsigned long ip; + const char *fmt; + u32 buf[]; +}; +#ifdef CONFIG_TRACE_BPRINTK +extern int trace_bprintk_enable; +#endif + #define TRACE_OLD_SIZE 88 struct trace_field_cont { @@ -285,6 +296,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ + IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ -- cgit v1.2.3 From 769b0441f438c4bb4872cb8560eb6fe51bcc09ee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Mar 2009 17:21:49 +0100 Subject: tracing/core: drop the old trace_printk() implementation in favour of trace_bprintk() Impact: faster and lighter tracing Now that we have trace_bprintk() which is faster and consume lesser memory than trace_printk() and has the same purpose, we can now drop the old implementation in favour of the binary one from trace_bprintk(), which means we move all the implementation of trace_bprintk() to trace_printk(), so the Api doesn't change except that we must now use trace_seq_bprintk() to print the TRACE_PRINT entries. Some changes result of this: - Previously, trace_bprintk depended of a single tracer and couldn't work without. This tracer has been dropped and the whole implementation of trace_printk() (like the module formats management) is now integrated in the tracing core (comes with CONFIG_TRACING), though we keep the file trace_printk (previously trace_bprintk.c) where we can find the module management. Thus we don't overflow trace.c - changes some parts to use trace_seq_bprintk() to print TRACE_PRINT entries. - change a bit trace_printk/trace_vprintk macros to support non-builtin formats constants, and fix 'const' qualifiers warnings. But this is all transparent for developers. - etc... V2: - Rebase against last changes - Fix mispell on the changelog V3: - Rebase against last changes (moving trace_printk() to kernel.h) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0f5077f8f95..6140922392c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,7 +20,6 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, - TRACE_BPRINTK, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -120,16 +119,10 @@ struct userstack_entry { */ struct print_entry { struct trace_entry ent; - unsigned long ip; + unsigned long ip; int depth; - char buf[]; -}; - -struct bprintk_entry { - struct trace_entry ent; - unsigned long ip; - const char *fmt; - u32 buf[]; + const char *fmt; + u32 buf[]; }; #ifdef CONFIG_TRACE_BPRINTK extern int trace_bprintk_enable; @@ -296,7 +289,6 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ - IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ -- cgit v1.2.3 From 9de36825b321fe9fe9cf73260554251af579f4ca Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 6 Mar 2009 17:52:03 +0100 Subject: tracing: trace_bprintk() cleanups Impact: cleanup Remove a few leftovers and clean up the code a bit. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6140922392c..2bfb7d11fc1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -119,14 +119,11 @@ struct userstack_entry { */ struct print_entry { struct trace_entry ent; - unsigned long ip; + unsigned long ip; int depth; const char *fmt; - u32 buf[]; + u32 buf[]; }; -#ifdef CONFIG_TRACE_BPRINTK -extern int trace_bprintk_enable; -#endif #define TRACE_OLD_SIZE 88 @@ -199,7 +196,7 @@ struct kmemtrace_free_entry { * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: * IRQS_OFF - interrupts were disabled - * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags + * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags * NEED_RESCED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler @@ -302,7 +299,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ - IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ @@ -325,8 +322,8 @@ enum print_line_t { * flags value in struct tracer_flags. */ struct tracer_opt { - const char *name; /* Will appear on the trace_options file */ - u32 bit; /* Mask assigned in val field in tracer_flags */ + const char *name; /* Will appear on the trace_options file */ + u32 bit; /* Mask assigned in val field in tracer_flags */ }; /* @@ -335,7 +332,7 @@ struct tracer_opt { */ struct tracer_flags { u32 val; - struct tracer_opt *opts; + struct tracer_opt *opts; }; /* Makes more easy to define a tracer opt */ @@ -390,7 +387,7 @@ struct tracer { int (*set_flag)(u32 old_flags, u32 bit, int set); struct tracer *next; int print_max; - struct tracer_flags *flags; + struct tracer_flags *flags; struct tracer_stat *stats; }; -- cgit v1.2.3 From da4d03020c2af32f73e8bfbab0a66620d85bb9bb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Mar 2009 17:14:30 -0400 Subject: tracing: new format for specialized trace points Impact: clean up and enhancement The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its ability to save data as well as to print the record out. Working with Ingo Molnar, we came up with a new format that is much more pleasing to the eye of C developers. This new macro is more C style than the old macro, and is more obvious to what it does. Here's the example. The only updated macro in this patch is the sched_switch trace point. The old method looked like this: TRACE_EVENT_FORMAT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_FMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TP_CMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d") ); The above method is hard to read and requires two format fields. The new method: /* * Tracepoint for task switches, performed by the scheduler: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ TRACE_EVENT(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TP_ARGS(rq, prev, next), TP_STRUCT__entry( __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) ), TP_printk("task %s:%d [%d] ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->next_comm, __entry->next_pid, __entry->next_prio), TP_fast_assign( memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ) ); This macro is called TRACE_EVENT, it is broken up into 5 parts: TP_PROTO: the proto type of the trace point TP_ARGS: the arguments of the trace point TP_STRUCT_entry: the structure layout of the entry in the ring buffer TP_printk: the printk format TP_fast_assign: the method used to write the entry into the ring buffer The structure is the definition of how the event will be saved in the ring buffer. The printk is used by the internal tracing in case of an oops, and the kernel needs to print out the format of the record to the console. This the TP_printk gives a means to show the records in a human readable format. It is also used to print out the data from the trace file. The TP_fast_assign is executed directly. It is basically like a C function, where the __entry is the handle to the record. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2bfb7d11fc1..c5e1d8865fe 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -751,12 +751,7 @@ struct ftrace_event_call { int (*regfunc)(void); void (*unregfunc)(void); int id; - struct dentry *raw_dir; - int raw_enabled; - int type; int (*raw_init)(void); - int (*raw_reg)(void); - void (*raw_unreg)(void); int (*show_format)(struct trace_seq *s); }; -- cgit v1.2.3 From 1852fcce181faa237c010a3dbedb473cf9d4555f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Mar 2009 14:33:00 -0400 Subject: tracing: expand the ring buffers when an event is activated To save memory, the tracer ring buffers are set to a minimum. The activating of a trace expands the ring buffer size. This patch adds this expanding, when an event is activated. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c5e1d8865fe..336324d717f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -737,6 +737,9 @@ static inline void trace_branch_disable(void) } #endif /* CONFIG_BRANCH_TRACER */ +/* set ring buffers to default size if not already done so */ +int tracing_update_buffers(void); + /* trace event type bit fields, not numeric */ enum { TRACE_EVENT_TYPE_PRINTF = 1, -- cgit v1.2.3 From 48ead02030f849d011259244bb4ea9b985479006 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 12 Mar 2009 18:24:49 +0100 Subject: tracing/core: bring back raw trace_printk for dynamic formats strings Impact: fix callsites with dynamic format strings Since its new binary implementation, trace_printk() internally uses static containers for the format strings on each callsites. But the value is assigned once at build time, which means that it can't take dynamic formats. So this patch unearthes the raw trace_printk implementation for the callers that will need trace_printk to be able to carry these dynamic format strings. The trace_printk() macro will use the appropriate implementation for each callsite. Most of the time however, the binary implementation will still be used. The other impact of this patch is that mmiotrace_printk() will use the old implementation because it calls the low level trace_vprintk and we can't guess here whether the format passed in it is dynamic or not. Some parts of this patch have been written by Steven Rostedt (most notably the part that chooses the appropriate implementation for each callsites). Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 336324d717f..cede1ab49d0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -20,6 +20,7 @@ enum trace_type { TRACE_WAKE, TRACE_STACK, TRACE_PRINT, + TRACE_BPRINT, TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, @@ -117,7 +118,7 @@ struct userstack_entry { /* * trace_printk entry: */ -struct print_entry { +struct bprint_entry { struct trace_entry ent; unsigned long ip; int depth; @@ -125,6 +126,13 @@ struct print_entry { u32 buf[]; }; +struct print_entry { + struct trace_entry ent; + unsigned long ip; + int depth; + char buf[]; +}; + #define TRACE_OLD_SIZE 88 struct trace_field_cont { @@ -286,6 +294,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ + IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ @@ -570,6 +579,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern long ns2usecs(cycle_t nsec); extern int +trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args); +extern int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); extern unsigned long trace_flags; -- cgit v1.2.3 From e9fb2b6d5845e24f104713591286b6f39761c027 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 12 Mar 2009 14:19:25 -0400 Subject: tracing: have event_trace_printk use static tracer Impact: speed up on event tracing The event_trace_printk is currently a wrapper function that calls trace_vprintk. Because it uses a variable for the fmt it misses out on the optimization of using the binary printk. This patch makes event_trace_printk into a macro wrapper to use the fmt as the same as the trace_printks. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cede1ab49d0..35cfa7bbaf3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -773,4 +773,21 @@ void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; +extern const char *__start___trace_bprintk_fmt[]; +extern const char *__stop___trace_bprintk_fmt[]; + +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + #endif /* _LINUX_KERNEL_TRACE_H */ -- cgit v1.2.3 From bdc067582b8b71c7771bab076bbc51569c594fb4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 13 Mar 2009 00:12:52 -0400 Subject: tracing: add comment for use of double __builtin_consant_p Impact: documentation The use of the double __builtin_contant_p checks in the event_trace_printk can be confusing to developers and reviewers. This patch adds a comment to explain why it is there. Requested-by: KOSAKI Motohiro LKML-Reference: <20090313122235.43EB.A69D9226@jp.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 35cfa7bbaf3..67595b8f0f1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -776,6 +776,11 @@ extern struct ftrace_event_call __stop_ftrace_events[]; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement optimizing out. + */ #define event_trace_printk(ip, fmt, args...) \ do { \ __trace_printk_check_format(fmt, ##args); \ -- cgit v1.2.3 From ee08c6eccb7d1295516f7cf420fddf7b14e9146f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Mar 2009 05:52:59 +0100 Subject: tracing/ftrace: syscall tracing infrastructure, basics Provide basic callbacks to do syscall tracing. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Lai Jiangshan LKML-Reference: <1236401580-5758-2-git-send-email-fweisbec@gmail.com> [ simplified it to a trace_printk() for now. ] Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c5e1d8865fe..3d49daae47d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -30,6 +30,8 @@ enum trace_type { TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_HW_BRANCHES, + TRACE_SYSCALL_ENTER, + TRACE_SYSCALL_EXIT, TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_POWER, -- cgit v1.2.3 From bed1ffca022cc876fb83161d26670e9b5d3cf36b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Mar 2009 15:42:11 +0100 Subject: tracing/syscalls: core infrastructure for syscalls tracing, enhancements Impact: new feature This adds the generic support for syscalls tracing. This is currently exploited through a devoted tracer but other tracing engines can use it. (They just have to play with {start,stop}_ftrace_syscalls() and use the display callbacks unless they want to override them.) The syscalls prototypes definitions are abused here to steal some metadata informations: - syscall name, param types, param names, number of params The syscall addr is not directly saved during this definition because we don't know if its prototype is available in the namespace. But we don't really need it. The arch has just to build a function able to resolve the syscall number to its metadata struct. The current tracer prints the syscall names, parameters names and values (and their types optionally). Currently the value is a raw hex but higher level values diplaying is on my TODO list. Signed-off-by: Frederic Weisbecker LKML-Reference: <1236955332-10133-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3d49daae47d..d80ca0d464d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -194,6 +194,19 @@ struct kmemtrace_free_entry { const void *ptr; }; +struct syscall_trace_enter { + struct trace_entry ent; + int nr; + unsigned long args[]; +}; + +struct syscall_trace_exit { + struct trace_entry ent; + int nr; + unsigned long ret; +}; + + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -306,6 +319,10 @@ extern void __ftrace_bad_type(void); TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ TRACE_KMEM_FREE); \ + IF_ASSIGN(var, ent, struct syscall_trace_enter, \ + TRACE_SYSCALL_ENTER); \ + IF_ASSIGN(var, ent, struct syscall_trace_exit, \ + TRACE_SYSCALL_EXIT); \ __ftrace_bad_type(); \ } while (0) -- cgit v1.2.3 From 4ca530852346be239b7c19e7bec5d2b78855bebe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 16 Mar 2009 19:20:15 -0400 Subject: tracing: protect reader of cmdline output Impact: fix to one cause of incorrect comm outputs in trace The spinlock only protected the creation of a comm <=> pid pair. But it was possible that a reader could look up a pid, and get the wrong comm because it had no locking. This also required changing trace_find_cmdline to copy the comm cache and not just send back a pointer to it. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 56ce34d90b0..b0ecad8ecc3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -547,7 +547,7 @@ struct tracer_switch_ops { }; #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ -extern char *trace_find_cmdline(int pid); +extern void trace_find_cmdline(int pid, char comm[]); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; -- cgit v1.2.3 From af4617bdba34aa556272b34c3986b0a4d588f568 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Mar 2009 18:09:55 -0400 Subject: tracing: add global-clock option to provide cross CPU clock to traces Impact: feature to allow better serialized clock This patch adds an option called "global-clock" that will allow the tracer to switch to a slower but more accurate (across CPUs) clock. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b0ecad8ecc3..26a7a28ca11 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -667,6 +667,7 @@ enum trace_iterator_flags { TRACE_ITER_PRINTK_MSGONLY = 0x10000, TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ TRACE_ITER_LATENCY_FMT = 0x40000, + TRACE_ITER_GLOBAL_CLK = 0x80000, }; /* -- cgit v1.2.3 From 40ce74f19c28077550646c76d96a075bf312e461 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 Mar 2009 14:03:53 -0400 Subject: tracing: remove recording function depth from trace_printk The function depth in trace_printk was to facilitate the function graph output. Now that the function graph calculates the depth within the trace output, we no longer need to record the depth when the trace_printk is called. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 38276d1638e..7c9a0cbf5dc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -123,7 +123,6 @@ struct userstack_entry { struct bprint_entry { struct trace_entry ent; unsigned long ip; - int depth; const char *fmt; u32 buf[]; }; @@ -131,7 +130,6 @@ struct bprint_entry { struct print_entry { struct trace_entry ent; unsigned long ip; - int depth; char buf[]; }; @@ -598,9 +596,9 @@ extern int trace_selftest_startup_branch(struct tracer *trace, extern void *head_page(struct trace_array_cpu *data); extern long ns2usecs(cycle_t nsec); extern int -trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args); +trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int -trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args); +trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; -- cgit v1.2.3 From ac199db0189c091f2863312061c0575937f68810 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:15 +0100 Subject: ftrace: event profile hooks Impact: new tracing infrastructure feature Provide infrastructure to generate software perf counter events from tracepoints. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt LKML-Reference: <20090319194233.557364871@chello.nl> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7c9a0cbf5dc..7cfb741be20 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -785,12 +785,23 @@ struct ftrace_event_call { int id; int (*raw_init)(void); int (*show_format)(struct trace_seq *s); + +#ifdef CONFIG_EVENT_PROFILE + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); +#endif }; void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; +#define for_each_event(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; -- cgit v1.2.3 From cf027f645e6aee4f0ca6197a6b6a57f327fdb13f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:30:39 -0500 Subject: tracing: add run-time field descriptions for event filtering This patch makes the field descriptions defined for event tracing available at run-time, for the event-filtering mechanism introduced in a subsequent patch. The common event fields are prepended with 'common_' in the format display, allowing them to be distinguished from the other fields that might internally have same name and can therefore be unambiguously used in filters. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710639.7703.46.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7cfb741be20..9288dc7ad14 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -775,16 +775,26 @@ enum { TRACE_EVENT_TYPE_RAW = 2, }; +struct ftrace_event_field { + struct list_head link; + char *name; + char *type; + int offset; + int size; +}; + struct ftrace_event_call { - char *name; - char *system; - struct dentry *dir; - int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); - int id; - int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + int (*raw_init)(void); + int (*show_format)(struct trace_seq *s); + int (*define_fields)(void); + struct list_head fields; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; @@ -793,6 +803,8 @@ struct ftrace_event_call { #endif }; +int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size); void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; -- cgit v1.2.3 From 7ce7e4249921d5073e764f7ff7ad83cfa9894bd7 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:31:04 -0500 Subject: tracing: add per-event filtering This patch adds per-event filtering to the event tracing subsystem. It adds a 'filter' debugfs file to each event directory. This file can be written to to set filters; reading from it will display the current set of filters set for that event. Basically, any field listed in the 'format' file for an event can be filtered on (including strings, but not yet other array types) using either matching ('==') or non-matching ('!=') 'predicates'. A 'predicate' can be either a single expression: # echo pid != 0 > filter # cat filter pid != 0 or a compound expression of up to 8 sub-expressions combined using '&&' or '||': # echo comm == Xorg > filter # echo "&& sig != 29" > filter # cat filter comm == Xorg && sig != 29 Only events having field values matching an expression will be available in the trace output; non-matching events are discarded. Note that a compound expression is built up by echoing each sub-expression separately - it's not the most efficient way to do things, but it keeps the parser simple and assumes that compound expressions will be relatively uncommon. In any case, a subsequent patch introducing a way to set filters for entire subsystems should mitigate any need to do this for lots of events. Setting a filter without an '&&' or '||' clears the previous filter completely and sets the filter to the new expression: # cat filter comm == Xorg && sig != 29 # echo comm != Xorg # cat filter comm != Xorg To clear a filter, echo 0 to the filter file: # echo 0 > filter # cat filter none The limit of 8 predicates for a compound expression is arbitrary - for efficiency, it's implemented as an array of pointers to predicates, and 8 seemed more than enough for any filter... Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710665.7703.48.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9288dc7ad14..d9eb39e4bb3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -795,6 +795,7 @@ struct ftrace_event_call { int (*show_format)(struct trace_seq *s); int (*define_fields)(void); struct list_head fields; + struct filter_pred **preds; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; @@ -803,8 +804,35 @@ struct ftrace_event_call { #endif }; +#define MAX_FILTER_PRED 8 + +struct filter_pred; + +typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); + +struct filter_pred { + filter_pred_fn_t fn; + u64 val; + char *str_val; + int str_len; + char *field_name; + int offset; + int not; + int or; + int compound; + int clear; +}; + int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size); +extern void filter_free_pred(struct filter_pred *pred); +extern int filter_print_preds(struct filter_pred **preds, char *buf); +extern int filter_parse(char **pbuf, struct filter_pred *pred); +extern int filter_add_pred(struct ftrace_event_call *call, + struct filter_pred *pred); +extern void filter_free_preds(struct ftrace_event_call *call); +extern int filter_match_preds(struct ftrace_event_call *call, void *rec); + void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; -- cgit v1.2.3 From cfb180f3e71b2a280a254c8646a9ab1beab63f84 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 22 Mar 2009 03:31:17 -0500 Subject: tracing: add per-subsystem filtering This patch adds per-subsystem filtering to the event tracing subsystem. It adds a 'filter' debugfs file to each subsystem directory. This file can be written to to set filters; reading from it will display the current set of filters set for that subsystem. Basically what it does is propagate the filter down to each event contained in the subsystem. If a particular event doesn't have a field with the name specified in the filter, it simply doesn't get set for that event. You can verify whether or not the filter was set for a particular event by looking at the filter file for that event. As with per-event filters, compound expressions are supported, echoing '0' to the subsystem's filter file clears all filters in the subsystem, etc. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker LKML-Reference: <1237710677.7703.49.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d9eb39e4bb3..f267723c3c5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -804,6 +804,18 @@ struct ftrace_event_call { #endif }; +struct event_subsystem { + struct list_head list; + const char *name; + struct dentry *entry; + struct filter_pred **preds; +}; + +#define events_for_each(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + #define MAX_FILTER_PRED 8 struct filter_pred; @@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); extern void filter_free_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern void filter_free_subsystem_preds(struct event_subsystem *system); +extern int filter_add_subsystem_pred(struct event_subsystem *system, + struct filter_pred *pred); void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; -- cgit v1.2.3 From 07edf7121374609709ef1b0889f6e7b8d6a62ec1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Mar 2009 23:10:46 +0100 Subject: tracing/events: don't use wake up for events Impact: fix hard-lockup with sched switch events Some ftrace events, such as sched wakeup, can be traced while the runqueue lock is hold. Since they are using trace_current_buffer_unlock_commit(), they call wake_up() which can try to grab the runqueue lock too, resulting in a deadlock. Now for all event, we call a new helper: trace_nowake_buffer_unlock_commit() which do pretty the same than trace_current_buffer_unlock_commit() except than it doesn't call trace_wake_up(). Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1237759847-21025-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f267723c3c5..54fd9bcd0a6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -483,6 +483,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, unsigned long flags, int pc); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); -- cgit v1.2.3 From 4bda2d517bfa3ce3d7044e06988cdddae7adffe2 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 24 Mar 2009 02:14:31 -0500 Subject: tracing/filters: use trace_seq_printf() to print filters Impact: cleanup Instead of just using the trace_seq buffer to print the filters, use trace_seq_printf() as it was intended to be used. Reported-by: Steven Rostedt Signed-off-by: Tom Zanussi Cc: =?ISO-8859-1?Q?Fr=E9d=E9ric?= Weisbecker LKML-Reference: <1237878871.8339.59.camel@charm-linux> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54fd9bcd0a6..90a848debcb 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -840,7 +840,8 @@ struct filter_pred { int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size); extern void filter_free_pred(struct filter_pred *pred); -extern int filter_print_preds(struct filter_pred **preds, char *buf); +extern void filter_print_preds(struct filter_pred **preds, + struct trace_seq *s); extern int filter_parse(char **pbuf, struct filter_pred *pred); extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); -- cgit v1.2.3 From be6f164a02f394675e2ac2077dd354cebef5b4c0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 11:06:24 -0400 Subject: function-graph: add option for include sleep times Impact: give user a choice to show times spent while sleeping The user may want to see the time a function spent sleeping. This patch adds the trace option "sleep-time" to allow that. The "sleep-time" option is default on. echo sleep-time > /debug/tracing/trace_options produces: ------------------------------------------ 2) avahi-d-3428 => -0 ------------------------------------------ 2) | finish_task_switch() { 2) 0.621 us | _spin_unlock_irq(); 2) 2.202 us | } 2) ! 1002.197 us | } 2) ! 1003.521 us | } where as, echo nosleep-time > /debug/tracing/trace_options produces: 0) -0 => yum-upd-3416 ------------------------------------------ 0) | finish_task_switch() { 0) 0.643 us | _spin_unlock_irq(); 0) 2.342 us | } 0) + 41.302 us | } 0) + 42.453 us | } Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7cfb741be20..d7410bbb9a8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -683,6 +683,7 @@ enum trace_iterator_flags { TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */ TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, + TRACE_ITER_SLEEP_TIME = 0x100000, }; /* -- cgit v1.2.3 From ca2b84cb3c4a0d4d2143b46ec072cdff5d1b3b87 Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Mon, 23 Mar 2009 15:12:24 +0200 Subject: kmemtrace: use tracepoints kmemtrace now uses tracepoints instead of markers. We no longer need to use format specifiers to pass arguments. Signed-off-by: Eduard - Gabriel Munteanu [ folded: Use the new TP_PROTO and TP_ARGS to fix the build. ] [ folded: fix build when CONFIG_KMEMTRACE is disabled. ] [ folded: define tracepoints when CONFIG_TRACEPOINTS is enabled. ] Signed-off-by: Pekka Enberg LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cb0ce3fc36d..cbc168f1e43 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -182,6 +182,12 @@ struct trace_power { struct power_trace state_data; }; +enum kmemtrace_type_id { + KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ + KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ + KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ +}; + struct kmemtrace_alloc_entry { struct trace_entry ent; enum kmemtrace_type_id type_id; -- cgit v1.2.3 From cf8e3474654f20433aab9aa35826d43b5f245008 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 30 Mar 2009 13:48:00 +0800 Subject: tracing: fix incorrect return type of ns2usecs() Impact: fix time output bug in 32bits system ns2usecs() returns 'long', it's incorrect. (In i386) ... -0 [000] 521.442100: _spin_lock <-tick_do_update_jiffies64 -0 [000] 521.442101: do_timer <-tick_do_update_jiffies64 -0 [000] 521.442102: update_wall_time <-do_timer -0 [000] 521.442102: update_xtime_cache <-update_wall_time .... (It always print the time less than 2200 seconds besides ...) Because 'long' is 32bits in i386. ( (1<<31) useconds is about 2200 seconds) ... -0 [001] 4154502640.134759: rcu_bh_qsctr_inc <-__do_softirq -0 [001] 4154502640.134760: _local_bh_enable <-__do_softirq -0 [001] 4154502640.134761: idle_cpu <-irq_exit ... (very large value) Because 'long' is a signed type and it is 32bits in i386. Changes in v2: return 'unsigned long long' instead of 'cycle_t' Signed-off-by: Lai Jiangshan LKML-Reference: <49D05D10.4030009@cn.fujitsu.com> Reported-by: Li Zefan Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cb0ce3fc36d..0d81a4a2a4a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -596,7 +596,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -extern long ns2usecs(cycle_t nsec); +extern unsigned long long ns2usecs(cycle_t nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int -- cgit v1.2.3