From d20e3b03842bfeb9d21817ff19054c277cc3eac0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 10:53:15 -0500 Subject: tracing: add TRACE_FIELD_SPECIAL to record complex entries Tom Zanussi pointed out that the simple TRACE_FIELD was not enough to record trace data that required memcpy. This patch addresses this issue by adding a TRACE_FIELD_SPECIAL. The format is similar to TRACE_FIELD but looks like so: TRACE_FIELD_SPECIAL(type_item, item, cmd) What TRACE_FIELD gave was: TRACE_FIELD(type, item, assign) The TRACE_FIELD would be used in declaring a structure: struct { type item; }; And later assign it via: entry->item = assign; What TRACE_FIELD_SPECIAL gives us is: In the declaration of the structure: struct { type_item; }; And the assignment: cmd; This change log will explain the one example used in the patch: TRACE_EVENT_FORMAT(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next), TPFMT("task %s:%d ==> %s:%d", prev->comm, prev->pid, next->comm, next->pid), TRACE_STRUCT( TRACE_FIELD(pid_t, prev_pid, prev->pid) TRACE_FIELD(int, prev_prio, prev->prio) TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], next_comm, TPCMD(memcpy(TRACE_ENTRY->next_comm, next->comm, TASK_COMM_LEN))) TRACE_FIELD(pid_t, next_pid, next->pid) TRACE_FIELD(int, next_prio, next->prio) ), TPRAWFMT("prev %d:%d ==> next %s:%d:%d") ); The struct will be create as: struct { pid_t prev_pid; int prev_prio; char next_comm[TASK_COMM_LEN]; pid_t next_pid; int next_prio; }; Note the TRACE_ENTRY in the cmd part of TRACE_SPECIAL. TRACE_ENTRY will be set by the tracer to point to the structure inside the trace buffer. entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; memcpy(entry->next_comm, next->comm, TASK_COMM_LEN); entry->next_pid = next->pid; entry->next_prio = next->prio Reported-by: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_stage_1.h | 2 ++ kernel/trace/trace_events_stage_2.h | 4 ++++ kernel/trace/trace_events_stage_3.h | 14 ++++++++++++++ 3 files changed, 20 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index fd3bf9382d3..3830a731424 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -30,5 +30,7 @@ #define TRACE_FIELD(type, item, assign) \ type item; +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + type_item; #include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 3eaaef5f19e..dc79fe3a2ec 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -39,6 +39,10 @@ #define TRACE_FIELD(type, item, assign) \ field->item, +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + field->item, + #undef TPRAWFMT #define TPRAWFMT(args...) args diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 7a161c49deb..2ab65e95822 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -147,6 +147,20 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD(type, item, assign)\ entry->item = assign; +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign)\ + entry->item = assign; + +#undef TPCMD +#define TPCMD(cmd...) cmd + +#undef TRACE_ENTRY +#define TRACE_ENTRY entry + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + cmd; + #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ _TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ -- cgit v1.2.3 From 11a241a3302277db05561e01477528629d806c4e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 11:49:04 -0500 Subject: tracing: add protection around modify trace event fields The trace event objects are currently not proctected against reentrancy. This patch adds a mutex around the modifications of the trace event fields. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1d07f800a9c..26069fa6b3b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -14,6 +14,8 @@ #define TRACE_SYSTEM "TRACE_SYSTEM" +static DEFINE_MUTEX(event_mutex); + #define events_for_each(event) \ for (event = __start_ftrace_events; \ (unsigned long)event < (unsigned long)__stop_ftrace_events; \ @@ -104,6 +106,7 @@ static int ftrace_set_clr_event(char *buf, int set) event = NULL; } + mutex_lock(&event_mutex); events_for_each(call) { if (!call->name) @@ -124,6 +127,8 @@ static int ftrace_set_clr_event(char *buf, int set) ret = 0; } + mutex_unlock(&event_mutex); + return ret; } @@ -324,7 +329,9 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, switch (val) { case 0: case 1: + mutex_lock(&event_mutex); ftrace_event_enable_disable(call, val); + mutex_unlock(&event_mutex); break; default: -- cgit v1.2.3 From f9520750c4c9924c14325cd951efae5fae58104c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 14:04:40 -0500 Subject: tracing: make trace_seq_reset global and rename to trace_seq_init Impact: clean up The trace_seq functions may be used separately outside of the ftrace iterator. The trace_seq_reset is needed for these operations. This patch also renames trace_seq_reset to the more appropriate trace_seq_init. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 17 +++++------------ kernel/trace/trace.h | 8 ++++++++ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c5e39cd7310..ea055aa21cd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -342,13 +342,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(tsk); } -static void -trace_seq_reset(struct trace_seq *s) -{ - s->len = 0; - s->readpos = 0; -} - ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt) { int len; @@ -395,7 +388,7 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s) s->buffer[len] = 0; seq_puts(m, s->buffer); - trace_seq_reset(s); + trace_seq_init(s); } /** @@ -2620,7 +2613,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, if (sret != -EBUSY) return sret; - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); @@ -2682,7 +2675,7 @@ waitagain: /* Now copy what we have to the user */ sret = trace_seq_to_user(&iter->seq, ubuf, cnt); if (iter->seq.readpos >= iter->seq.len) - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); /* * If there was nothing to send to user, inspite of consuming trace @@ -2819,7 +2812,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, partial[i].offset = 0; partial[i].len = iter->seq.len; - trace_seq_reset(&iter->seq); + trace_seq_init(&iter->seq); } mutex_unlock(&iter->mutex); @@ -3631,7 +3624,7 @@ trace_printk_seq(struct trace_seq *s) printk(KERN_TRACE "%s", s->buffer); - trace_seq_reset(s); + trace_seq_init(s); } void ftrace_dump(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f6fa0b9f83a..cf6ba4181b1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -395,6 +395,14 @@ struct trace_seq { unsigned int readpos; }; +static inline void +trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; +} + + #define TRACE_PIPE_ALL_CPU -1 /* -- cgit v1.2.3 From 981d081ec8b958b7d962ee40d433581a55d40fc5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 13:53:59 -0500 Subject: tracing: add format file to describe event struct fields This patch adds the "format" file to the trace point event directory. This is based off of work by Tom Zanussi, in which a file is exported to be tread from user land such that a user space app may read the binary record stored in the ring buffer. # cat /debug/tracing/events/sched/sched_switch/format field:pid_t prev_pid; offset:12; size:4; field:int prev_prio; offset:16; size:4; field special:char next_comm[TASK_COMM_LEN]; offset:20; size:16; field:pid_t next_pid; offset:36; size:4; field:int next_prio; offset:40; size:4; Idea-from: Tom Zanussi Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 56 ++++++++++++++++++++++++++++++++++++- kernel/trace/trace_events_stage_2.h | 52 ++++++++++++++++++++++++++++++++++ kernel/trace/trace_events_stage_3.h | 2 ++ 4 files changed, 110 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cf6ba4181b1..e606633fb49 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -754,6 +754,7 @@ struct ftrace_event_call { int (*raw_init)(void); int (*raw_reg)(void); void (*raw_unreg)(void); + int (*show_format)(struct trace_seq *s); }; void event_trace_printk(unsigned long ip, const char *fmt, ...); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 26069fa6b3b..d57a772981c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3,6 +3,9 @@ * * Copyright (C) 2008 Red Hat Inc, Steven Rostedt * + * - Added format output of fields of the trace point. + * This was based off of work by Tom Zanussi . + * */ #include @@ -444,6 +447,42 @@ event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +static ssize_t +event_format_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + struct trace_seq *s; + char *buf; + int r; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + if (*ppos) + return 0; + + r = call->show_format(s); + if (!r) { + /* + * ug! The format output is bigger than a PAGE!! + */ + buf = "FORMAT TOO BIG\n"; + r = simple_read_from_buffer(ubuf, cnt, ppos, + buf, strlen(buf)); + goto out; + } + + r = simple_read_from_buffer(ubuf, cnt, ppos, + s->buffer, s->len); + out: + kfree(s); + return r; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -490,6 +529,11 @@ static const struct file_operations ftrace_available_types_fops = { .read = event_available_types_read, }; +static const struct file_operations ftrace_event_format_fops = { + .open = tracing_open_generic, + .read = event_format_read, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -602,7 +646,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) &ftrace_available_types_fops); if (!entry) pr_warning("Could not create debugfs " - "'%s/type' available_types\n", call->name); + "'%s/available_types' entry\n", call->name); + + /* A trace may not want to export its format */ + if (!call->show_format) + return 0; + + entry = debugfs_create_file("format", 0444, call->dir, call, + &ftrace_event_format_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/format' entry\n", call->name); return 0; } diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index dc79fe3a2ec..3a80ea4e92c 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -74,3 +74,55 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ } #include + +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " size:%d; offset:%d;\n", + * sizeof(field.type), + * offsetof(struct ftrace_raw_##call, + * item)); + * + * } + */ + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%lu;\tsize:%lu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; + + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ + "offset:%lu;\tsize:%lu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + return ret; \ +} + +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 2ab65e95822..c62a4d2a528 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -101,6 +101,7 @@ * .raw_init = ftrace_raw_init_event_, * .raw_reg = ftrace_raw_reg_event_, * .raw_unreg = ftrace_raw_unreg_event_, + * .show_format = ftrace_format_, * } * */ @@ -230,4 +231,5 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_raw_init_event_##call, \ .raw_reg = ftrace_raw_reg_event_##call, \ .raw_unreg = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ } -- cgit v1.2.3 From 91729ef96661bfa7dc53923746cd90b62d5495cc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 15:03:01 -0500 Subject: tracing: add ftrace headers to event format files This patch includes the ftrace header to the event formats files: # cat /debug/tracing/events/sched/sched_switch/format field:unsigned char type; offset:0; size:1; field:unsigned char flags; offset:1; size:1; field:unsigned char preempt_count; offset:2; size:1; field:int pid; offset:4; size:4; field:int tgid; offset:8; size:4; field:pid_t prev_pid; offset:12; size:4; field:int prev_prio; offset:16; size:4; field special:char next_comm[TASK_COMM_LEN]; offset:20; size:16; field:pid_t next_pid; offset:36; size:4; field:int next_prio; offset:40; size:4; A blank line is used as a deliminator between the ftrace header and the trace point fields. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d57a772981c..cdcc3aed76f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -13,7 +13,7 @@ #include #include -#include "trace.h" +#include "trace_output.h" #define TRACE_SYSTEM "TRACE_SYSTEM" @@ -447,6 +447,28 @@ event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +#undef FIELD +#define FIELD(type, name) \ + #type, #name, offsetof(typeof(field), name), sizeof(field.name) + +static int trace_write_header(struct trace_seq *s) +{ + struct trace_entry field; + + /* struct trace_entry */ + return trace_seq_printf(s, + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\n", + FIELD(unsigned char, type), + FIELD(unsigned char, flags), + FIELD(unsigned char, preempt_count), + FIELD(int, pid), + FIELD(int, tgid)); +} static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -465,6 +487,9 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, if (*ppos) return 0; + /* If this fails, so will the show_format. */ + trace_write_header(s); + r = call->show_format(s); if (!r) { /* -- cgit v1.2.3 From c5e4e19271edfdf1abd4184933d40d646da6a091 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 15:10:02 -0500 Subject: tracing: add trace name and id to event formats To be able to identify the trace in the binary format output, the id of the trace event (which is dynamically assigned) must also be listed. This patch adds the name of the trace point as well as the id assigned. Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index cdcc3aed76f..210e71ff82d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -487,7 +487,11 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, if (*ppos) return 0; - /* If this fails, so will the show_format. */ + /* If any of the first writes fail, so will the show_format. */ + + trace_seq_printf(s, "name: %s\n", call->name); + trace_seq_printf(s, "ID: %d\n", call->id); + trace_seq_printf(s, "format:\n"); trace_write_header(s); r = call->show_format(s); -- cgit v1.2.3 From 96ccd21cd13140221bda74a4fc4e53ffeba7c7d4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 2 Mar 2009 15:22:21 -0500 Subject: tracing: add print format to event trace format files This patch adds the internal print format used to print the raw events to the event trace point format file. # cat /debug/tracing/events/sched/sched_switch/format name: sched_switch ID: 29 format: field:unsigned char type; offset:0; size:1; field:unsigned char flags; offset:1; size:1; field:unsigned char preempt_count; offset:2; size:1; field:int pid; offset:4; size:4; field:int tgid; offset:8; size:4; field:pid_t prev_pid; offset:12; size:4; field:int prev_prio; offset:16; size:4; field special:char next_comm[TASK_COMM_LEN]; offset:20; size:16; field:pid_t next_pid; offset:36; size:4; field:int next_prio; offset:40; size:4; print fmt: "prev %d:%d ==> next %s:%d:%d" Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_stage_2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 3a80ea4e92c..b1cebba1d9b 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -122,6 +122,8 @@ ftrace_format_##call(struct trace_seq *s) \ \ tstruct; \ \ + trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + \ return ret; \ } -- cgit v1.2.3