From c47956d9ae3341d2d1998bff26620fa3338c01e4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 23 Dec 2008 23:24:11 -0500
Subject: ftrace: remove obsolete print continue functionality

Impact: cleanup, remove obsolete code

Now that the ring buffer used by ftrace allows for variable length
entries, we do not need the 'cont' feature of the buffer.  This code
makes other parts of ftrace more complex and by removing this it
simplifies the ftrace code.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cc7a4f86403..3a357382cce 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -16,7 +16,6 @@ enum trace_type {
 	TRACE_FN,
 	TRACE_CTX,
 	TRACE_WAKE,
-	TRACE_CONT,
 	TRACE_STACK,
 	TRACE_PRINT,
 	TRACE_SPECIAL,
@@ -178,7 +177,6 @@ struct trace_power {
  *  NEED_RESCED		- reschedule is requested
  *  HARDIRQ		- inside an interrupt handler
  *  SOFTIRQ		- inside a softirq handler
- *  CONT		- multiple entries hold the trace item
  */
 enum trace_flag_type {
 	TRACE_FLAG_IRQS_OFF		= 0x01,
@@ -186,7 +184,6 @@ enum trace_flag_type {
 	TRACE_FLAG_NEED_RESCHED		= 0x04,
 	TRACE_FLAG_HARDIRQ		= 0x08,
 	TRACE_FLAG_SOFTIRQ		= 0x10,
-	TRACE_FLAG_CONT			= 0x20,
 };
 
 #define TRACE_BUF_SIZE		1024
@@ -262,7 +259,6 @@ extern void __ftrace_bad_type(void);
 	do {								\
 		IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);	\
 		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
-		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
@@ -489,9 +485,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 
 extern void *head_page(struct trace_array_cpu *data);
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
-extern void trace_seq_print_cont(struct trace_seq *s,
-				 struct trace_iterator *iter);
-
 extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
 		unsigned long sym_flags);
-- 
cgit v1.2.3


From f0868d1e23a8efec33beb3aa688aab7fdb1ae093 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 23 Dec 2008 23:24:12 -0500
Subject: ftrace: set up trace event hash infrastructure

Impact: simplify/generalize/refactor trace.c

The trace.c file is becoming more difficult to maintain due to the
growing number of events. There is several formats that an event may
be printed. This patch sets up the infrastructure of an event hash to
allow for events to register how they should be printed.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3a357382cce..6bd71fa1e1c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -30,7 +30,7 @@ enum trace_type {
 	TRACE_HW_BRANCHES,
 	TRACE_POWER,
 
-	__TRACE_LAST_TYPE
+	__TRACE_LAST_TYPE,
 };
 
 /*
@@ -484,12 +484,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
-extern int
-seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
-		unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-				 size_t cnt);
 extern long ns2usecs(cycle_t nsec);
 extern int
 trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
-- 
cgit v1.2.3


From dbd0b4b33074aa6b7832a9d9a5bd985eca5c1aa2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 28 Dec 2008 20:44:51 -0800
Subject: tracing/ftrace: provide the base infrastructure for histogram tracing

Impact: extend the tracing API

The goal of this patch is to normalize and make more easy the
implementation of statistical (histogram) tracing.

It implements a trace_stat file into the /debugfs/tracing directory where
one can print a one-shot output of statistics/histogram entries.

A tracer has to provide two basic iterator callbacks:

  stat_start() => the first entry
  stat_next(prev, idx) => the next one.

Note that it is adapted for arrays or hash tables or lists.... since it
provides a pointer to the previous entry and the current index of the
iterator.

These two callbacks are called to get a snapshot of the statistics at each
opening of the trace_stat file because. The values are so updated between
two "cat trace_stat". And the tracer is free to lock its datas during the
iteration to keep consistent values.

Since it is almost always interesting to sort statisticals values to
address the problems by priority, this infrastructure provides a "sorting"
of the stat entries too if desired. A tracer has just to provide a
stat_cmp callback to compare two entries and the stat tracing
infrastructure will build a sorted list of the given entries.

A last callback, called stat_headers, can be implemented by a tracer to
output headers on its trace.

If one of these callbacks is changed on runtime, it just have to signal it
to the stat tracing API by calling the init_tracer_stat() helper.

Changes in V2:

- Fix a memory leak if the user opens multiple times the trace_stat file
  without closing it. Now we always free our list before rebuilding it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6bd71fa1e1c..05fa804d1c1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -336,6 +336,21 @@ struct tracer {
 	struct tracer		*next;
 	int			print_max;
 	struct tracer_flags 	*flags;
+
+	/*
+	 * If you change one of the following on tracing runtime, recall
+	 * init_tracer_stat()
+	 */
+
+	/* Iteration over statistic entries */
+	void			*(*stat_start)(void);
+	void			*(*stat_next)(void *prev, int idx);
+	/* Compare two entries for sorting (optional) for stats */
+	int			(*stat_cmp)(void *p1, void *p2);
+	/* Print a stat entry */
+	int			(*stat_show)(struct seq_file *s, void *p);
+	/* Print the headers of your stat entries */
+	int			(*stat_headers)(struct seq_file *s);
 };
 
 struct trace_seq {
@@ -421,6 +436,8 @@ void tracing_start_sched_switch_record(void);
 int register_tracer(struct tracer *type);
 void unregister_tracer(struct tracer *type);
 
+void init_tracer_stat(struct tracer *trace);
+
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 extern unsigned long tracing_max_latency;
-- 
cgit v1.2.3


From f7d48cbde5c0710008caeaf7dbf14f4a9b064940 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 29 Dec 2008 13:02:17 +0100
Subject: tracing/ftrace: make trace_find_cmdline() generally available

Impact: build fix

On !CONFIG_CONTEXT_SWITCH_TRACER trace_find_cmdline() is not defined:

 kernel/trace/trace_output.c: In function 'trace_ctxwake_print':
 kernel/trace/trace_output.c:499: error: implicit declaration of function 'trace_find_cmdline'
 kernel/trace/trace_output.c:499: warning: assignment makes pointer from integer without a cast

Move it to the generic section in trace.h.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 05fa804d1c1..a8b624ccd4d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -469,10 +469,10 @@ struct tracer_switch_ops {
 	void				*private;
 	struct tracer_switch_ops	*next;
 };
-
-char *trace_find_cmdline(int pid);
 #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
 
+extern char *trace_find_cmdline(int pid);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
 #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
-- 
cgit v1.2.3


From 36994e58a48fb8f9651c7dc845a6de298aba5bfc Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 29 Dec 2008 13:42:23 -0800
Subject: tracing/kmemtrace: normalize the raw tracer event to the unified
 tracing API

Impact: new tracer plugin

This patch adapts kmemtrace raw events tracing to the unified tracing API.

To enable and use this tracer, just do the following:

 echo kmemtrace > /debugfs/tracing/current_tracer
 cat /debugfs/tracing/trace

You will have the following output:

 # tracer: kmemtrace
 #
 #
 # ALLOC  TYPE  REQ   GIVEN  FLAGS           POINTER         NODE    CALLER
 # FREE   |      |     |       |              |   |            |        |
 # |

type_id 1 call_site 18446744071565527833 ptr 18446612134395152256
type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1
type_id 1 call_site 18446744071565585534 ptr 18446612134405955584
type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1
type_id 0 call_site 18446744071565636711 ptr 18446612134345164672 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1
type_id 1 call_site 18446744071565585534 ptr 18446612134405955584
type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1
type_id 0 call_site 18446744071565636711 ptr 18446612134345164912 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1
type_id 1 call_site 18446744071565585534 ptr 18446612134405955584
type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1
type_id 0 call_site 18446744071565636711 ptr 18446612134345165152 bytes_req 240 bytes_alloc 240 gfp_flags 208 node -1
type_id 0 call_site 18446744071566144042 ptr 18446612134346191680 bytes_req 1304 bytes_alloc 1312 gfp_flags 208 node -1
type_id 1 call_site 18446744071565585534 ptr 18446612134405955584
type_id 0 call_site 18446744071565585597 ptr 18446612134405955584 bytes_req 4096 bytes_alloc 4096 gfp_flags 208 node -1
type_id 1 call_site 18446744071565585534 ptr 18446612134405955584

That was to stay backward compatible with the format output produced in
inux/tracepoint.h.

This is the default ouput, but note that I tried something else.

If you change an option:

echo kmem_minimalistic > /debugfs/trace_options

and then cat /debugfs/trace, you will have the following output:

 # tracer: kmemtrace
 #
 #
 # ALLOC  TYPE  REQ   GIVEN  FLAGS           POINTER         NODE    CALLER
 # FREE   |      |     |       |              |   |            |        |
 # |

   -      C                            0xffff88007c088780          file_free_rcu
   +      K   4096   4096   000000d0   0xffff88007cad6000     -1   getname
   -      C                            0xffff88007cad6000          putname
   +      K   4096   4096   000000d0   0xffff88007cad6000     -1   getname
   +      K    240    240   000000d0   0xffff8800790dc780     -1   d_alloc
   -      C                            0xffff88007cad6000          putname
   +      K   4096   4096   000000d0   0xffff88007cad6000     -1   getname
   +      K    240    240   000000d0   0xffff8800790dc870     -1   d_alloc
   -      C                            0xffff88007cad6000          putname
   +      K   4096   4096   000000d0   0xffff88007cad6000     -1   getname
   +      K    240    240   000000d0   0xffff8800790dc960     -1   d_alloc
   +      K   1304   1312   000000d0   0xffff8800791d7340     -1   reiserfs_alloc_inode
   -      C                            0xffff88007cad6000          putname
   +      K   4096   4096   000000d0   0xffff88007cad6000     -1   getname
   -      C                            0xffff88007cad6000          putname
   +      K    992   1000   000000d0   0xffff880079045b58     -1   alloc_inode
   +      K    768   1024   000080d0   0xffff88007c096400     -1   alloc_pipe_info
   +      K    240    240   000000d0   0xffff8800790dca50     -1   d_alloc
   +      K    272    320   000080d0   0xffff88007c088780     -1   get_empty_filp
   +      K    272    320   000080d0   0xffff88007c088000     -1   get_empty_filp

Yeah I shall confess kmem_minimalistic should be: kmem_alternative.

Whatever, I find it more readable but this a personal opinion of course.
We can drop it if you want.

On the ALLOC/FREE column, + means an allocation and - a free.

On the type column, you have K = kmalloc, C = cache, P = page

I would like the flags to be GFP_* strings but that would not be easy to not
break the column with strings....

About the node...it seems to always be -1. I don't know why but that shouldn't
be difficult to find.

I moved linux/tracepoint.h to trace/tracepoint.h as well. I think that would
be more easy to find the tracer headers if they are all in their common
directory.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cc7a4f86403..534505bb39b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,7 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
+#include <trace/kmemtrace.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -29,6 +30,8 @@ enum trace_type {
 	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
 	TRACE_HW_BRANCHES,
+	TRACE_KMEM_ALLOC,
+	TRACE_KMEM_FREE,
 	TRACE_POWER,
 
 	__TRACE_LAST_TYPE
@@ -170,6 +173,24 @@ struct trace_power {
 	struct power_trace	state_data;
 };
 
+struct kmemtrace_alloc_entry {
+	struct trace_entry	ent;
+	enum kmemtrace_type_id type_id;
+	unsigned long call_site;
+	const void *ptr;
+	size_t bytes_req;
+	size_t bytes_alloc;
+	gfp_t gfp_flags;
+	int node;
+};
+
+struct kmemtrace_free_entry {
+	struct trace_entry	ent;
+	enum kmemtrace_type_id type_id;
+	unsigned long call_site;
+	const void *ptr;
+};
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -280,6 +301,10 @@ extern void __ftrace_bad_type(void);
 			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
  		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
+			  TRACE_KMEM_ALLOC);	\
+		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
+			  TRACE_KMEM_FREE);	\
 		__ftrace_bad_type();					\
 	} while (0)
 
-- 
cgit v1.2.3


From 034939b65ad5ff64b9709210b3469a95153c51a3 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 8 Jan 2009 10:03:56 -0800
Subject: tracing/ftrace: handle more than one stat file per tracer

Impact: new API for tracers

Make the stat tracing API reentrant. And also provide the new directory
/debugfs/tracing/trace_stat which will contain all the stat files for the
current active tracer.

Now a tracer will, if desired, want to provide a zero terminated array of
tracer_stat structures.
Each one contains the callbacks necessary for one stat file.
It have to provide at least a name for its stat file, an iterator with
stat_start/start_next callback and an output callback for one stat entry.

Also adapt the branch tracer to this new API.
We create two files "all" and "annotated" inside the /debugfs/tracing/trace_stat
directory, making the both stats simultaneously available instead of needing
to change an option to switch from one stat file to another.

The output of these stats haven't changed.

Changes in v2:

_ Apply the previous memory leak fix (rebase against tip/master)

Changes in v3:

_ Merge the patch that adapted the branch tracer to this Api in this patch to
  not break the kernel build.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 94ed45e93a8..b3f9ad1b4d8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -334,6 +334,25 @@ struct tracer_flags {
 /* Makes more easy to define a tracer opt */
 #define TRACER_OPT(s, b)	.name = #s, .bit = b
 
+/*
+ * If you want to provide a stat file (one-shot statistics), fill
+ * an iterator with stat_start/stat_next and a stat_show callbacks.
+ * The others callbacks are optional.
+ */
+struct tracer_stat {
+	/* The name of your stat file */
+	const char		*name;
+	/* Iteration over statistic entries */
+	void			*(*stat_start)(void);
+	void			*(*stat_next)(void *prev, int idx);
+	/* Compare two entries for sorting (optional) for stats */
+	int			(*stat_cmp)(void *p1, void *p2);
+	/* Print a stat entry */
+	int			(*stat_show)(struct seq_file *s, void *p);
+	/* Print the headers of your stat entries */
+	int			(*stat_headers)(struct seq_file *s);
+};
+
 /*
  * A specific tracer, represented by methods that operate on a trace array:
  */
@@ -361,21 +380,7 @@ struct tracer {
 	struct tracer		*next;
 	int			print_max;
 	struct tracer_flags 	*flags;
-
-	/*
-	 * If you change one of the following on tracing runtime, recall
-	 * init_tracer_stat()
-	 */
-
-	/* Iteration over statistic entries */
-	void			*(*stat_start)(void);
-	void			*(*stat_next)(void *prev, int idx);
-	/* Compare two entries for sorting (optional) for stats */
-	int			(*stat_cmp)(void *p1, void *p2);
-	/* Print a stat entry */
-	int			(*stat_show)(struct seq_file *s, void *p);
-	/* Print the headers of your stat entries */
-	int			(*stat_headers)(struct seq_file *s);
+	struct tracer_stat	*stats;
 };
 
 struct trace_seq {
-- 
cgit v1.2.3


From 002bb86d8d42f18937aef396c3ecd65c7e02e21a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 10 Jan 2009 11:34:13 -0800
Subject: tracing/ftrace: separate events tracing and stats tracing engine

Impact: tracing's Api change

Currently, the stat tracing depends on the events tracing.
When you switch to a new tracer, the stats files of the previous tracer
will disappear. But it's more scalable to separate those two engines.
This way, we can keep the stat files of one or several tracers when we
want, without bothering of multiple tracer stat files or tracer switching.

To build/destroys its stats files, a tracer just have to call
register_stat_tracer/unregister_stat_tracer everytimes it wants to.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b3f9ad1b4d8..79c872100dd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -334,24 +334,6 @@ struct tracer_flags {
 /* Makes more easy to define a tracer opt */
 #define TRACER_OPT(s, b)	.name = #s, .bit = b
 
-/*
- * If you want to provide a stat file (one-shot statistics), fill
- * an iterator with stat_start/stat_next and a stat_show callbacks.
- * The others callbacks are optional.
- */
-struct tracer_stat {
-	/* The name of your stat file */
-	const char		*name;
-	/* Iteration over statistic entries */
-	void			*(*stat_start)(void);
-	void			*(*stat_next)(void *prev, int idx);
-	/* Compare two entries for sorting (optional) for stats */
-	int			(*stat_cmp)(void *p1, void *p2);
-	/* Print a stat entry */
-	int			(*stat_show)(struct seq_file *s, void *p);
-	/* Print the headers of your stat entries */
-	int			(*stat_headers)(struct seq_file *s);
-};
 
 /*
  * A specific tracer, represented by methods that operate on a trace array:
@@ -466,8 +448,6 @@ void tracing_start_sched_switch_record(void);
 int register_tracer(struct tracer *type);
 void unregister_tracer(struct tracer *type);
 
-void init_tracer_stat(struct tracer *trace);
-
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 extern unsigned long tracing_max_latency;
-- 
cgit v1.2.3


From 5361499101306cfb776c3cfa0f69d0479bc63868 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 15 Jan 2009 19:12:40 -0500
Subject: ftrace: add stack trace to function tracer

Impact: new feature to stack trace any function

Chris Mason asked about being able to pick and choose a function
and get a stack trace from it. This feature enables his request.

 # echo io_schedule > /debug/tracing/set_ftrace_filter
 # echo function > /debug/tracing/current_tracer
 # echo func_stack_trace > /debug/tracing/trace_options

Produces the following in /debug/tracing/trace:

       kjournald-702   [001]   135.673060: io_schedule <-sync_buffer
       kjournald-702   [002]   135.673671:
 <= sync_buffer
 <= __wait_on_bit
 <= out_of_line_wait_on_bit
 <= __wait_on_buffer
 <= sync_dirty_buffer
 <= journal_commit_transaction
 <= kjournald

Note, be careful about turning this on without filtering the functions.
You may find that you have a 10 second lag between typing and seeing
what you typed. This is why the stack trace for the function tracer
does not use the same stack_trace flag as the other tracers use.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 79c872100dd..bf39a369e4b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -457,6 +457,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
 			  struct task_struct *tsk, int cpu);
 
+void __trace_stack(struct trace_array *tr,
+		   struct trace_array_cpu *data,
+		   unsigned long flags,
+		   int skip, int pc);
+
 extern cycle_t ftrace_now(int cpu);
 
 #ifdef CONFIG_FUNCTION_TRACER
@@ -467,6 +472,8 @@ void tracing_stop_function_trace(void);
 # define tracing_stop_function_trace()		do { } while (0)
 #endif
 
+extern int ftrace_function_enabled;
+
 #ifdef CONFIG_CONTEXT_SWITCH_TRACER
 typedef void
 (*tracer_switch_func_t)(void *private,
-- 
cgit v1.2.3


From a225cdd263f340c864febb1992802fb5b08bc328 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 15 Jan 2009 23:06:03 -0500
Subject: ftrace: remove static from function tracer functions

Impact: clean up

After reorganizing the functions in trace.c and trace_function.c,
they no longer need to be in global context. This patch makes the
functions and one variable into static.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index bf39a369e4b..54b72781e92 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -464,16 +464,6 @@ void __trace_stack(struct trace_array *tr,
 
 extern cycle_t ftrace_now(int cpu);
 
-#ifdef CONFIG_FUNCTION_TRACER
-void tracing_start_function_trace(void);
-void tracing_stop_function_trace(void);
-#else
-# define tracing_start_function_trace()		do { } while (0)
-# define tracing_stop_function_trace()		do { } while (0)
-#endif
-
-extern int ftrace_function_enabled;
-
 #ifdef CONFIG_CONTEXT_SWITCH_TRACER
 typedef void
 (*tracer_switch_func_t)(void *private,
-- 
cgit v1.2.3


From b1818748b0cf9427e48acf9713295e829a0d715f Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Mon, 19 Jan 2009 10:31:01 +0100
Subject: x86, ftrace, hw-branch-tracer: dump trace on oops

Dump the branch trace on an oops (based on ftrace_dump_on_oops).

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 54b72781e92..b96037d970d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -438,7 +438,6 @@ void trace_function(struct trace_array *tr,
 
 void trace_graph_return(struct ftrace_graph_ret *trace);
 int trace_graph_entry(struct ftrace_graph_ent *trace);
-void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
 
 void tracing_start_cmdline_record(void);
 void tracing_stop_cmdline_record(void);
-- 
cgit v1.2.3


From c71a896154119f4ca9e89d6078f5f63ad60ef199 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 23 Jan 2009 12:06:27 -0200
Subject: blktrace: add ftrace plugin

Impact: New way of using the blktrace infrastructure

This drops the requirement of userspace utilities to use the blktrace
facility.

Configuration is done thru sysfs, adding a "trace" directory to the
partition directory where blktrace can be enabled for the associated
request_queue.

The same filters present in the IOCTL interface are present as sysfs
device attributes.

The /sys/block/sdX/sdXN/trace/enable file allows tracing without any
filters.

The other files in this directory: pid, act_mask, start_lba and end_lba
can be used with the same meaning as with the IOCTL interface.

Using the sysfs interface will only setup the request_queue->blk_trace
fields, tracing will only take place when the "blk" tracer is selected
via the ftrace interface, as in the following example:

To see the trace, one can use the /d/tracing/trace file or the
/d/tracign/trace_pipe file, with semantics defined in the ftrace
documentation in Documentation/ftrace.txt.

[root@f10-1 ~]# cat /t/trace
       kjournald-305   [000]  3046.491224:   8,1    A WBS 6367 + 8 <- (8,1) 6304
       kjournald-305   [000]  3046.491227:   8,1    Q   R 6367 + 8 [kjournald]
       kjournald-305   [000]  3046.491236:   8,1    G  RB 6367 + 8 [kjournald]
       kjournald-305   [000]  3046.491239:   8,1    P  NS [kjournald]
       kjournald-305   [000]  3046.491242:   8,1    I RBS 6367 + 8 [kjournald]
       kjournald-305   [000]  3046.491251:   8,1    D  WB 6367 + 8 [kjournald]
       kjournald-305   [000]  3046.491610:   8,1    U  WS [kjournald] 1
          <idle>-0     [000]  3046.511914:   8,1    C  RS 6367 + 8 [6367]
[root@f10-1 ~]#

The default line context (prefix) format is the one described in the ftrace
documentation, with the blktrace specific bits using its existing format,
described in blkparse(8).

If one wants to have the classic blktrace formatting, this is possible by
using:

[root@f10-1 ~]# echo blk_classic > /t/trace_options
[root@f10-1 ~]# cat /t/trace
  8,1    0  3046.491224   305  A WBS 6367 + 8 <- (8,1) 6304
  8,1    0  3046.491227   305  Q   R 6367 + 8 [kjournald]
  8,1    0  3046.491236   305  G  RB 6367 + 8 [kjournald]
  8,1    0  3046.491239   305  P  NS [kjournald]
  8,1    0  3046.491242   305  I RBS 6367 + 8 [kjournald]
  8,1    0  3046.491251   305  D  WB 6367 + 8 [kjournald]
  8,1    0  3046.491610   305  U  WS [kjournald] 1
  8,1    0  3046.511914     0  C  RS 6367 + 8 [6367]
[root@f10-1 ~]#

Using the ftrace standard format allows more flexibility, such
as the ability of asking for backtraces via trace_options:

[root@f10-1 ~]# echo noblk_classic > /t/trace_options
[root@f10-1 ~]# echo stacktrace > /t/trace_options

[root@f10-1 ~]# cat /t/trace
       kjournald-305   [000]  3318.826779:   8,1    A WBS 6375 + 8 <- (8,1) 6312
       kjournald-305   [000]  3318.826782:
 <= submit_bio
 <= submit_bh
 <= sync_dirty_buffer
 <= journal_commit_transaction
 <= kjournald
 <= kthread
 <= child_rip
       kjournald-305   [000]  3318.826836:   8,1    Q   R 6375 + 8 [kjournald]
       kjournald-305   [000]  3318.826837:
 <= generic_make_request
 <= submit_bio
 <= submit_bh
 <= sync_dirty_buffer
 <= journal_commit_transaction
 <= kjournald
 <= kthread

Please read the ftrace documentation to use aditional, standardized
tracing filters such as /d/tracing/trace_cpumask, etc.

See also /d/tracing/trace_mark to add comments in the trace stream,
that is equivalent to the /d/block/sdaN/msg interface.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b96037d970d..e603a291134 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -32,6 +32,7 @@ enum trace_type {
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
 	TRACE_POWER,
+	TRACE_BLK,
 
 	__TRACE_LAST_TYPE,
 };
-- 
cgit v1.2.3


From c4a8e8be2d43cc22b371e8e9c05c253409759d94 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 2 Feb 2009 20:29:21 -0200
Subject: trace: better manage the context info for events

Impact: make trace_event more convenient for tracers

All tracers (for the moment) that use the struct trace_event want to
have the context info printed before their own output: the pid/cmdline,
cpu, and timestamp.

But some other tracers that want to implement their trace_event
callbacks will not necessary need these information or they may want to
format them as they want.

This patch adds a new default-enabled trace option:
TRACE_ITER_CONTEXT_INFO When disabled through:

echo nocontext-info > /debugfs/tracing/trace_options

The pid, cpu and timestamps headers will not be printed.

IE with the sched_switch tracer with context-info (default):

     bash-2935 [001] 100.356561: 2935:120:S ==> [001]  0:140:R <idle>
   <idle>-0    [000] 100.412804:    0:140:R   + [000] 11:115:S events/0
   <idle>-0    [000] 100.412816:    0:140:R ==> [000] 11:115:R events/0
 events/0-11   [000] 100.412829:   11:115:S ==> [000]  0:140:R <idle>

Without context-info:

 2935:120:S ==> [001]  0:140:R <idle>
    0:140:R   + [000] 11:115:S events/0
    0:140:R ==> [000] 11:115:R events/0
   11:115:S ==> [000]  0:140:R <idle>

A tracer can disable it at runtime by clearing the bit
TRACE_ITER_CONTEXT_INFO in trace_flags.

The print routines were renamed to trace_print_context and
trace_print_lat_context, so that they can be used by tracers if they
want to use them for one of the trace_event callbacks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e603a291134..f0c7a0f08ca 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -405,6 +405,10 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
+
+struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
+					  int *ent_cpu, u64 *ent_ts);
+
 void tracing_generic_entry_update(struct trace_entry *entry,
 				  unsigned long flags,
 				  int pc);
@@ -591,7 +595,8 @@ enum trace_iterator_flags {
 	TRACE_ITER_ANNOTATE		= 0x2000,
 	TRACE_ITER_USERSTACKTRACE       = 0x4000,
 	TRACE_ITER_SYM_USEROBJ          = 0x8000,
-	TRACE_ITER_PRINTK_MSGONLY	= 0x10000
+	TRACE_ITER_PRINTK_MSGONLY	= 0x10000,
+	TRACE_ITER_CONTEXT_INFO		= 0x20000 /* Print pid/cpu/time */
 };
 
 /*
-- 
cgit v1.2.3


From 7be421510b91491d5aa5a29fa1005712039b95af Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 5 Feb 2009 01:13:37 -0500
Subject: trace: Remove unused trace_array_cpu parameter

Impact: cleanup

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f0c7a0f08ca..df627a94869 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -419,14 +419,12 @@ void ftrace(struct trace_array *tr,
 			    unsigned long parent_ip,
 			    unsigned long flags, int pc);
 void tracing_sched_switch_trace(struct trace_array *tr,
-				struct trace_array_cpu *data,
 				struct task_struct *prev,
 				struct task_struct *next,
 				unsigned long flags, int pc);
 void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
-				struct trace_array_cpu *data,
 				struct task_struct *wakee,
 				struct task_struct *cur,
 				unsigned long flags, int pc);
@@ -436,7 +434,6 @@ void trace_special(struct trace_array *tr,
 		   unsigned long arg2,
 		   unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
-		    struct trace_array_cpu *data,
 		    unsigned long ip,
 		    unsigned long parent_ip,
 		    unsigned long flags, int pc);
@@ -462,7 +459,6 @@ void update_max_tr_single(struct trace_array *tr,
 			  struct task_struct *tsk, int cpu);
 
 void __trace_stack(struct trace_array *tr,
-		   struct trace_array_cpu *data,
 		   unsigned long flags,
 		   int skip, int pc);
 
-- 
cgit v1.2.3


From 51a763dd84253bab1d0a1e68e11a7753d1b702ca Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 5 Feb 2009 16:14:13 -0200
Subject: tracing: Introduce trace_buffer_{lock_reserve,unlock_commit}
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: new API

These new functions do what previously was being open coded, reducing
the number of details ftrace plugin writers have to worry about.

It also standardizes the handling of stacktrace, userstacktrace and
other trace options we may introduce in the future.

With this patch, for instance, the blk tracer (and some others already
in the tree) can use the "userstacktrace" /d/tracing/trace_options
facility.

$ codiff /tmp/vmlinux.before /tmp/vmlinux.after
linux-2.6-tip/kernel/trace/trace.c:
  trace_vprintk              |   -5
  trace_graph_return         |  -22
  trace_graph_entry          |  -26
  trace_function             |  -45
  __ftrace_trace_stack       |  -27
  ftrace_trace_userstack     |  -29
  tracing_sched_switch_trace |  -66
  tracing_stop               |   +1
  trace_seq_to_user          |   -1
  ftrace_trace_special       |  -63
  ftrace_special             |   +1
  tracing_sched_wakeup_trace |  -70
  tracing_reset_online_cpus  |   -1
 13 functions changed, 2 bytes added, 355 bytes removed, diff: -353

linux-2.6-tip/block/blktrace.c:
  __blk_add_trace |  -58
 1 function changed, 58 bytes removed, diff: -58

linux-2.6-tip/kernel/trace/trace.c:
  trace_buffer_lock_reserve  |  +88
  trace_buffer_unlock_commit |  +86
 2 functions changed, 174 bytes added, diff: +174

/tmp/vmlinux.after:
 16 functions changed, 176 bytes added, 413 bytes removed, diff: -237

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Frédéric Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index df627a94869..e03f157c772 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -403,6 +403,17 @@ int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
+struct ring_buffer_event;
+
+struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
+						    unsigned char type,
+						    unsigned long len,
+						    unsigned long flags,
+						    int pc);
+void trace_buffer_unlock_commit(struct trace_array *tr,
+				struct ring_buffer_event *event,
+				unsigned long flags, int pc);
+
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
 
-- 
cgit v1.2.3


From b6f11df26fdc28324cf9c9e3b77f2dc985c1bb13 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 5 Feb 2009 18:02:00 -0200
Subject: trace: Call tracing_reset_online_cpus before tracer->init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Impact: cleanup

To make it easy for ftrace plugin writers, as this was open coded in
the existing plugins

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Frédéric Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e03f157c772..f2742fb1575 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -395,6 +395,7 @@ struct trace_iterator {
 	cpumask_var_t		started;
 };
 
+int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
-- 
cgit v1.2.3


From 1830b52d0de8c60c4f5dfbac134aa8f69d815801 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Sat, 7 Feb 2009 19:38:43 -0500
Subject: trace: remove deprecated entry->cpu

Impact: fix to prevent developers from using entry->cpu

With the new ring buffer infrastructure, the cpu for the entry is
implicit with which CPU buffer it is on.

The original code use to record the current cpu into the generic
entry header, which can be retrieved by entry->cpu. When the
ring buffer was introduced, the users were convert to use the
the cpu number of which cpu ring buffer was in use (this was passed
to the tracers by the iterator: iter->cpu).

Unfortunately, the cpu item in the entry structure was never removed.
This allowed for developers to use it instead of the proper iter->cpu,
unknowingly, using an uninitialized variable. This was not the fault
of the developers, since it would seem like the logical place to
retrieve the cpu identifier.

This patch removes the cpu item from the entry structure and fixes
all the users that should have been using iter->cpu.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f0c7a0f08ca..5efc4c707f7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -45,7 +45,6 @@ enum trace_type {
  */
 struct trace_entry {
 	unsigned char		type;
-	unsigned char		cpu;
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
-- 
cgit v1.2.3


From 57794a9d48b63e34acbe63282628c9f029603308 Mon Sep 17 00:00:00 2001
From: Wenji Huang <wenji.huang@oracle.com>
Date: Fri, 6 Feb 2009 17:33:27 +0800
Subject: trace: trivial fixes in comment typos.

Impact: clean up

Fixed several typos in the comments.

Signed-off-by: Wenji Huang <wenji.huang@oracle.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5efc4c707f7..f92aba52a89 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -616,12 +616,12 @@ extern struct tracer nop_trace;
  * preempt_enable (after a disable), a schedule might take place
  * causing an infinite recursion.
  *
- * To prevent this, we read the need_recshed flag before
+ * To prevent this, we read the need_resched flag before
  * disabling preemption. When we want to enable preemption we
  * check the flag, if it is set, then we call preempt_enable_no_resched.
  * Otherwise, we call preempt_enable.
  *
- * The rational for doing the above is that if need resched is set
+ * The rational for doing the above is that if need_resched is set
  * and we have yet to reschedule, we are either in an atomic location
  * (where we do not need to check for scheduling) or we are inside
  * the scheduler and do not want to resched.
@@ -642,7 +642,7 @@ static inline int ftrace_preempt_disable(void)
  *
  * This is a scheduler safe way to enable preemption and not miss
  * any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we were either inside an atomic or
+ * If resched is set, then we are either inside an atomic or
  * are inside the scheduler (we would have already scheduled
  * otherwise). In this case, we do not want to call normal
  * preempt_enable, but preempt_enable_no_resched instead.
-- 
cgit v1.2.3


From 7447dce96f2233d250bc39a4a10a42f7c3dd46fc Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 7 Feb 2009 21:33:57 +0100
Subject: tracing/function-graph-tracer: provide a selftest for the function
 graph tracer

Making it more easy to do a basic regression test for this tracer.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b9838f4a692..a011ec06222 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -500,6 +500,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 extern int trace_selftest_startup_function(struct tracer *trace,
 					   struct trace_array *tr);
+extern int trace_selftest_startup_function_graph(struct tracer *trace,
+						 struct trace_array *tr);
 extern int trace_selftest_startup_irqsoff(struct tracer *trace,
 					  struct trace_array *tr);
 extern int trace_selftest_startup_preemptoff(struct tracer *trace,
-- 
cgit v1.2.3


From 1292211058aaf872eeb2a0e2677d237916b4501f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 7 Feb 2009 22:16:12 +0100
Subject: tracing/power: move the power trace headers to a dedicated file

Impact: cleanup

Move the power tracer headers to trace/power.h to keep ftrace.h and power bits
more easy to maintain as separated topics.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a011ec06222..1ecfb9d2b36 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -10,6 +10,7 @@
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 #include <trace/kmemtrace.h>
+#include <trace/power.h>
 
 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
-- 
cgit v1.2.3


From b91facc367366b3f71375f337eb5997ec9ab4e69 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 6 Feb 2009 18:30:44 +0100
Subject: tracing/function-graph-tracer: handle the leaf functions from
 trace_pipe

When one cats the trace file, the leaf functions are printed without brackets:

 function();

whereas in the trace_pipe file we'll see the following:

 function() {
 }

This is because the ring_buffer handling is not the same between those two files.
On the trace file, when an entry is printed, the iterator advanced and then we can
check the next entry.

There is no iterator with trace_pipe, the current entry to print has been peeked
and not consumed. So checking the next entry will still return the current one while
we don't consume it.

This patch introduces a new value for the output callbacks to ask the tracing
core to not consume the current entry after printing it.

We need it because we will have to consume the current entry ourself to check
the next one.

Now the trace_pipe is able to handle well the leaf functions.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1ecfb9d2b36..7b0518adf6d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -63,13 +63,13 @@ struct ftrace_entry {
 
 /* Function call entry */
 struct ftrace_graph_ent_entry {
-	struct trace_entry			ent;
+	struct trace_entry		ent;
 	struct ftrace_graph_ent		graph_ent;
 };
 
 /* Function return entry */
 struct ftrace_graph_ret_entry {
-	struct trace_entry			ent;
+	struct trace_entry		ent;
 	struct ftrace_graph_ret		ret;
 };
 extern struct tracer boot_tracer;
@@ -309,7 +309,8 @@ extern void __ftrace_bad_type(void);
 enum print_line_t {
 	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
 	TRACE_TYPE_HANDLED	= 1,
-	TRACE_TYPE_UNHANDLED	= 2	/* Relay to other output functions */
+	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
+	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
 };
 
 
-- 
cgit v1.2.3


From 3c56819b14b00dd449bd776303e61f8532fad09f Mon Sep 17 00:00:00 2001
From: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Date: Mon, 9 Feb 2009 08:15:56 +0200
Subject: tracing: splice support for tracing_pipe

Added and implemented tracing_pipe_fops->splice_read(). This allows
userspace programs to get tracing data more efficiently.

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7b0518adf6d..dbff0207b21 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -353,6 +353,12 @@ struct tracer {
 	ssize_t			(*read)(struct trace_iterator *iter,
 					struct file *filp, char __user *ubuf,
 					size_t cnt, loff_t *ppos);
+	ssize_t			(*splice_read)(struct trace_iterator *iter,
+					       struct file *filp,
+					       loff_t *ppos,
+					       struct pipe_inode_info *pipe,
+					       size_t len,
+					       unsigned int flags);
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 	int			(*selftest)(struct tracer *trace,
 					    struct trace_array *tr);
-- 
cgit v1.2.3


From 6eaaa5d57e76c454479833fc8594cd7c3b75c789 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Feb 2009 02:25:00 +0100
Subject: tracing/core: use appropriate waiting on trace_pipe

Impact: api and pipe waiting change

Currently, the waiting used in tracing_read_pipe() is done through a
100 msecs schedule_timeout() loop which periodically check if there
are traces on the buffer.

This can cause small latencies for programs which are reading the incoming
events.

This patch makes the reader waiting for the trace_wait waitqueue except
for few tracers such as the sched and functions tracers which might be
already hold the runqueue lock while waking up the reader.

This is performed through a new callback wait_pipe() on struct tracer.
If none is implemented on a specific tracer, the default waiting for
trace_wait queue is attached.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dbff0207b21..eed732c151f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -337,18 +337,34 @@ struct tracer_flags {
 #define TRACER_OPT(s, b)	.name = #s, .bit = b
 
 
-/*
- * A specific tracer, represented by methods that operate on a trace array:
+/**
+ * struct tracer - a specific tracer and its callbacks to interact with debugfs
+ * @name: the name chosen to select it on the available_tracers file
+ * @init: called when one switches to this tracer (echo name > current_tracer)
+ * @reset: called when one switches to another tracer
+ * @start: called when tracing is unpaused (echo 1 > tracing_enabled)
+ * @stop: called when tracing is paused (echo 0 > tracing_enabled)
+ * @open: called when the trace file is opened
+ * @pipe_open: called when the trace_pipe file is opened
+ * @wait_pipe: override how the user waits for traces on trace_pipe
+ * @close: called when the trace file is released
+ * @read: override the default read callback on trace_pipe
+ * @splice_read: override the default splice_read callback on trace_pipe
+ * @selftest: selftest to run on boot (see trace_selftest.c)
+ * @print_headers: override the first lines that describe your columns
+ * @print_line: callback that prints a trace
+ * @set_flag: signals one of your private flags changed (trace_options file)
+ * @flags: your private flags
  */
 struct tracer {
 	const char		*name;
-	/* Your tracer should raise a warning if init fails */
 	int			(*init)(struct trace_array *tr);
 	void			(*reset)(struct trace_array *tr);
 	void			(*start)(struct trace_array *tr);
 	void			(*stop)(struct trace_array *tr);
 	void			(*open)(struct trace_iterator *iter);
 	void			(*pipe_open)(struct trace_iterator *iter);
+	void			(*wait_pipe)(struct trace_iterator *iter);
 	void			(*close)(struct trace_iterator *iter);
 	ssize_t			(*read)(struct trace_iterator *iter,
 					struct file *filp, char __user *ubuf,
@@ -432,6 +448,9 @@ void tracing_generic_entry_update(struct trace_entry *entry,
 				  unsigned long flags,
 				  int pc);
 
+void default_wait_pipe(struct trace_iterator *iter);
+void poll_wait_pipe(struct trace_iterator *iter);
+
 void ftrace(struct trace_array *tr,
 			    struct trace_array_cpu *data,
 			    unsigned long ip,
-- 
cgit v1.2.3


From b04cc6b1f6398b0e0b60d37e27ce51b4899672ec Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 25 Feb 2009 03:22:28 +0100
Subject: tracing/core: introduce per cpu tracing files

Impact: split up tracing output per cpu

Currently, on the tracing debugfs directory, three files are
available to the user to let him extracting the trace output:

- trace is an iterator through the ring-buffer. It's a reader
  but not a consumer It doesn't block when no more traces are
  available.

- trace pretty similar to the former, except that it adds more
  informations such as prempt count, irq flag, ...

- trace_pipe is a reader and a consumer, it will also block
  waiting for traces if necessary (heh, yes it's a pipe).

The traces coming from different cpus are curretly mixed up
inside these files. Sometimes it messes up the informations,
sometimes it's useful, depending on what does the tracer
capture.

The tracing_cpumask file is useful to filter the output and
select only the traces captured a custom defined set of cpus.
But still it is not enough powerful to extract at the same time
one trace buffer per cpu.

So this patch creates a new directory: /debug/tracing/per_cpu/.

Inside this directory, you will now find one trace_pipe file and
one trace file per cpu.

Which means if you have two cpus, you will have:

 trace0
 trace1
 trace_pipe0
 trace_pipe1

And of course, reading these files will have the same effect
than with the usual tracing files, except that you will only see
the traces from the given cpu.

The original all-in-one cpu trace file are still available on
their original place.

Until now, only one consumer was allowed on trace_pipe to avoid
racy consuming on the ring-buffer. Now the approach changed a
bit, you can have only one consumer per cpu.

Which means you are allowed to read concurrently trace_pipe0 and
trace_pipe1 But you can't have two readers on trace_pipe0 or
trace_pipe1.

Following the same logic, if there is one reader on the common
trace_pipe, you can not have at the same time another reader on
trace_pipe0 or in trace_pipe1. Because in trace_pipe is already
a consumer in all cpu buffers in essence.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index eed732c151f..508235a39da 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -395,6 +395,8 @@ struct trace_seq {
 	unsigned int		readpos;
 };
 
+#define TRACE_PIPE_ALL_CPU	-1
+
 /*
  * Trace iterator - used by printout routines who present trace
  * results to users and which routines might sleep, etc:
@@ -404,6 +406,7 @@ struct trace_iterator {
 	struct tracer		*trace;
 	void			*private;
 	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+	int			cpu_file;
 
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
-- 
cgit v1.2.3


From d7350c3f45694104e820041969c8185c5f99e57c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 25 Feb 2009 06:13:16 +0100
Subject: tracing/core: make the read callbacks reentrants

Now that several per-cpu files can be read or spliced at the
same, we want the read/splice callbacks for tracing files to be
reentrants.

Until now, a single global mutex (trace_types_lock) serialized
the access to tracing_read_pipe(), tracing_splice_read_pipe(),
and the seq helpers.

Ie: it means that if a user tries to read trace_pipe0 and
trace_pipe1 at the same time, the access to the function
tracing_read_pipe() is contended and one reader must wait for
the other to finish its read call.

The trace_type_lock mutex is mostly here to serialize the access
to the global current tracer (current_trace), which can be
changed concurrently. Although the iter struct keeps a private
pointer to this tracer, its callbacks can be changed by another
function.

The method used here is to not keep anymore private reference to
the tracer inside the iterator but to make a copy of it inside
the iterator. Then it checks on subsequents read calls if the
tracer has changed. This is not costly because the current
tracer is not expected to be changed often, so we use a branch
prediction for that.

Moreover, we add a private mutex to the iterator (there is one
iterator per file descriptor) to serialize the accesses in case
of multiple consumers per file descriptor (which would be a
silly idea from the user). Note that this is not to protect the
ring buffer, since the ring buffer already serializes the
readers accesses. This is to prevent from traces weirdness in
case of concurrent consumers. But these mutexes can be dropped
anyway, that would not result in any crash. Just tell me what
you think about it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 508235a39da..632191770aa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -405,8 +405,9 @@ struct trace_iterator {
 	struct trace_array	*tr;
 	struct tracer		*trace;
 	void			*private;
-	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
 	int			cpu_file;
+	struct mutex		mutex;
+	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
 
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
-- 
cgit v1.2.3


From ef5580d0fffce6e0a01043bac0625128b5d409a7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 27 Feb 2009 19:38:04 -0500
Subject: tracing: add interface to write into current tracer buffer

Right now all tracers must manage their own trace buffers. This was
to enforce tracers to be independent in case we finally decide to
allow each tracer to have their own trace buffer.

But now we are adding event tracing that writes to the current tracer's
buffer. This adds an interface to allow events to write to the current
tracer buffer without having to manage its own. Since event tracing
has no "tracer", and is just a way to hook into any other tracer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 632191770aa..adf161f6dd1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -442,6 +442,12 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc);
 
+struct ring_buffer_event *
+trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+				  unsigned long flags, int pc);
+void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
+
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
 
-- 
cgit v1.2.3


From c32e827b25054cb17b79cf97fb5e63ae4ce2223c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 27 Feb 2009 19:12:30 -0500
Subject: tracing: add raw trace point recording infrastructure

Impact: lower overhead tracing

The current event tracer can automatically pick up trace points
that are registered with the TRACE_FORMAT macro. But it required
a printf format string and parsing. Although, this adds the ability
to get guaranteed information like task names and such, it took
a hit in overhead processing. This processing can add about 500-1000
nanoseconds overhead, but in some cases that too is considered
too much and we want to shave off as much from this overhead as
possible.

Tom Zanussi recently posted tracing patches to lkml that are based
on a nice idea about capturing the data via C structs using
STRUCT_ENTER, STRUCT_EXIT type of macros.

I liked that method very much, but did not like the implementation
that required a developer to add data/code in several disjoint
locations.

This patch extends the event_tracer macros to do a similar "raw C"
approach that Tom Zanussi did. But instead of having the developers
needing to tweak a bunch of code all over the place, they can do it
all in one macro - preferably placed near the code that it is
tracing. That makes it much more likely that tracepoints will be
maintained on an ongoing basis by the code they modify.

The new macro TRACE_EVENT_FORMAT is created for this approach. (Note,
a developer may still utilize the more low level DECLARE_TRACE macros
if they don't care about getting their traces automatically in the event
tracer.)

They can also use the existing TRACE_FORMAT if they don't need to code
the tracepoint in C, but just want to use the convenience of printf.

So if the developer wants to "hardwire" a tracepoint in the fastest
possible way, and wants to acquire their data via a user space utility
in a raw binary format, or wants to see it in the trace output but not
sacrifice any performance, then they can implement the faster but
more complex TRACE_EVENT_FORMAT macro.

Here's what usage looks like:

  TRACE_EVENT_FORMAT(name,
	TPPROTO(proto),
	TPARGS(args),
	TPFMT(fmt, fmt_args),
	TRACE_STUCT(
		TRACE_FIELD(type1, item1, assign1)
		TRACE_FIELD(type2, item2, assign2)
			[...]
	),
	TPRAWFMT(raw_fmt)
	);

Note name, proto, args, and fmt, are all identical to what TRACE_FORMAT
uses.

 name: is the unique identifier of the trace point
 proto: The proto type that the trace point uses
 args: the args in the proto type
 fmt: printf format to use with the event printf tracer
 fmt_args: the printf argments to match fmt

 TRACE_STRUCT starts the ability to create a structure.
 Each item in the structure is defined with a TRACE_FIELD

  TRACE_FIELD(type, item, assign)

 type: the C type of item.
 item: the name of the item in the stucture
 assign: what to assign the item in the trace point callback

 raw_fmt is a way to pretty print the struct. It must match
  the order of the items are added in TRACE_STUCT

 An example of this would be:

 TRACE_EVENT_FORMAT(sched_wakeup,
	TPPROTO(struct rq *rq, struct task_struct *p, int success),
	TPARGS(rq, p, success),
	TPFMT("task %s:%d %s",
	      p->comm, p->pid, success?"succeeded":"failed"),
	TRACE_STRUCT(
		TRACE_FIELD(pid_t, pid, p->pid)
		TRACE_FIELD(int, success, success)
	),
	TPRAWFMT("task %d success=%d")
	);

 This creates us a unique struct of:

 struct {
	pid_t		pid;
	int		success;
 };

 And the way the call back would assign these values would be:

	entry->pid = p->pid;
	entry->success = success;

The nice part about this is that the creation of the assignent is done
via macro magic in the event tracer.  Once the TRACE_EVENT_FORMAT is
created, the developer will then have a faster method to record
into the ring buffer. They do not need to worry about the tracer itself.

The developer would only need to touch the files in include/trace/*.h

Again, I would like to give special thanks to Tom Zanussi for this
nice idea.

Idea-from: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index adf161f6dd1..aa1ab0cb80a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -726,4 +726,23 @@ static inline void trace_branch_disable(void)
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
+struct ftrace_event_call {
+	char		*name;
+	char		*system;
+	struct dentry	*dir;
+	int		enabled;
+	int		(*regfunc)(void);
+	void		(*unregfunc)(void);
+	int		id;
+	struct dentry	*raw_dir;
+	int		raw_enabled;
+	int		(*raw_init)(void);
+	int		(*raw_reg)(void);
+	void		(*raw_unreg)(void);
+};
+
+void event_trace_printk(unsigned long ip, const char *fmt, ...);
+extern struct ftrace_event_call __start_ftrace_events[];
+extern struct ftrace_event_call __stop_ftrace_events[];
+
 #endif /* _LINUX_KERNEL_TRACE_H */
-- 
cgit v1.2.3


From fd99498989f3b3feeab89dcadf537138ba136d24 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Sat, 28 Feb 2009 02:41:25 -0500
Subject: tracing: add raw fast tracing interface for trace events

This patch adds the interface to enable the C style trace points.
In the directory /debugfs/tracing/events/subsystem/event
We now have three files:

 enable : values 0 or 1 to enable or disable the trace event.

 available_types: values 'raw' and 'printf' which indicate the tracing
       types available for the trace point. If a developer does not
       use the TRACE_EVENT_FORMAT macro and just uses the TRACE_FORMAT
       macro, then only 'printf' will be available. This file is
       read only.

 type: values 'raw' or 'printf'. This indicates which type of tracing
       is active for that trace point. 'printf' is the default and
       if 'raw' is not available, this file is read only.

 # echo raw > /debug/tracing/events/sched/sched_wakeup/type
 # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable

 Will enable the C style tracing for the sched_wakeup trace point.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index aa1ab0cb80a..f6fa0b9f83a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -726,6 +726,12 @@ static inline void trace_branch_disable(void)
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
+/* trace event type bit fields, not numeric */
+enum {
+	TRACE_EVENT_TYPE_PRINTF		= 1,
+	TRACE_EVENT_TYPE_RAW		= 2,
+};
+
 struct ftrace_event_call {
 	char		*name;
 	char		*system;
@@ -736,6 +742,7 @@ struct ftrace_event_call {
 	int		id;
 	struct dentry	*raw_dir;
 	int		raw_enabled;
+	int		type;
 	int		(*raw_init)(void);
 	int		(*raw_reg)(void);
 	void		(*raw_unreg)(void);
-- 
cgit v1.2.3


From f9520750c4c9924c14325cd951efae5fae58104c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 2 Mar 2009 14:04:40 -0500
Subject: tracing: make trace_seq_reset global and rename to trace_seq_init

Impact: clean up

The trace_seq functions may be used separately outside of the ftrace
iterator. The trace_seq_reset is needed for these operations.

This patch also renames trace_seq_reset to the more appropriate
trace_seq_init.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f6fa0b9f83a..cf6ba4181b1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -395,6 +395,14 @@ struct trace_seq {
 	unsigned int		readpos;
 };
 
+static inline void
+trace_seq_init(struct trace_seq *s)
+{
+	s->len = 0;
+	s->readpos = 0;
+}
+
+
 #define TRACE_PIPE_ALL_CPU	-1
 
 /*
-- 
cgit v1.2.3


From 981d081ec8b958b7d962ee40d433581a55d40fc5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 2 Mar 2009 13:53:59 -0500
Subject: tracing: add format file to describe event struct fields

This patch adds the "format" file to the trace point event directory.
This is based off of work by Tom Zanussi, in which a file is exported
to be tread from user land such that a user space app may read the
binary record stored in the ring buffer.

 # cat /debug/tracing/events/sched/sched_switch/format
        field:pid_t prev_pid;   offset:12;      size:4;
        field:int prev_prio;    offset:16;      size:4;
        field special:char next_comm[TASK_COMM_LEN];    offset:20;      size:16;
        field:pid_t next_pid;   offset:36;      size:4;
        field:int next_prio;    offset:40;      size:4;

Idea-from: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cf6ba4181b1..e606633fb49 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -754,6 +754,7 @@ struct ftrace_event_call {
 	int		(*raw_init)(void);
 	int		(*raw_reg)(void);
 	void		(*raw_unreg)(void);
+	int		(*show_format)(struct trace_seq *s);
 };
 
 void event_trace_printk(unsigned long ip, const char *fmt, ...);
-- 
cgit v1.2.3


From 2cadf9135eb3b6d84b6427314be827ddd443c308 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 1 Dec 2008 22:20:19 -0500
Subject: tracing: add binary buffer files for use with splice

Impact: new feature

This patch creates a directory of files that correspond to the
per CPU ring buffers. These are binary files and are made to
be used with splice. This is the fastest way to extract data from
the ftrace ring buffers.

Thanks to Jiaying Zhang for pushing me to get this code fixed,
 and to Eduard - Gabriel Munteanu for his splice code that helped
 me debug my code.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e606633fb49..561bb5c5d98 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -217,6 +217,7 @@ enum trace_flag_type {
  */
 struct trace_array_cpu {
 	atomic_t		disabled;
+	void			*buffer_page;	/* ring buffer spare */
 
 	/* these fields get copied into max-trace: */
 	unsigned long		trace_idx;
-- 
cgit v1.2.3


From c032ef64d680717e4e8ce3da65da6419a35f8a2c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 4 Mar 2009 20:34:24 -0500
Subject: tracing: add latency output format option

With the removal of the latency_trace file, we lost the ability
to see some of the finer details in a trace. Like the state of
interrupts enabled, the preempt count, need resched, and if we
are in an interrupt handler, softirq handler or not.

This patch simply creates an option to bring back the old format.
This also removes the warning about an unused variable that held
the latency_trace file operations.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 561bb5c5d98..12cd119cca3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -651,7 +651,8 @@ enum trace_iterator_flags {
 	TRACE_ITER_USERSTACKTRACE       = 0x4000,
 	TRACE_ITER_SYM_USEROBJ          = 0x8000,
 	TRACE_ITER_PRINTK_MSGONLY	= 0x10000,
-	TRACE_ITER_CONTEXT_INFO		= 0x20000 /* Print pid/cpu/time */
+	TRACE_ITER_CONTEXT_INFO		= 0x20000, /* Print pid/cpu/time */
+	TRACE_ITER_LATENCY_FMT		= 0x40000,
 };
 
 /*
-- 
cgit v1.2.3


From 5e1607a00bd082972629d3d68c95c8bcf902b55a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 5 Mar 2009 10:24:48 +0100
Subject: tracing: rename ftrace_printk() => trace_printk()

Impact: cleanup

Use a more generic name - this also allows the prototype to move
to kernel.h and be generally available to kernel developers who
want to do some quick tracing.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 12cd119cca3..8beff03fda6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -115,7 +115,7 @@ struct userstack_entry {
 };
 
 /*
- * ftrace_printk entry:
+ * trace_printk entry:
  */
 struct print_entry {
 	struct trace_entry	ent;
-- 
cgit v1.2.3


From 1427cdf0592368bdec57276edaf714040ee8744f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 6 Mar 2009 17:21:47 +0100
Subject: tracing: infrastructure for supporting binary record

Impact: save on memory for tracing

Current tracers are typically using a struct(like struct ftrace_entry,
struct ctx_switch_entry, struct special_entr etc...)to record a binary
event. These structs can only record a their own kind of events.
A new kind of tracer need a new struct and a lot of code too handle it.

So we need a generic binary record for events. This infrastructure
is for this purpose.

[fweisbec@gmail.com: rebase against latest -tip, make it safe while sched
tracing as reported by Steven Rostedt]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1236356510-8381-3-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8beff03fda6..0f5077f8f95 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -20,6 +20,7 @@ enum trace_type {
 	TRACE_WAKE,
 	TRACE_STACK,
 	TRACE_PRINT,
+	TRACE_BPRINTK,
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
@@ -124,6 +125,16 @@ struct print_entry {
 	char			buf[];
 };
 
+struct bprintk_entry {
+	struct trace_entry ent;
+	unsigned long ip;
+	const char *fmt;
+	u32 buf[];
+};
+#ifdef CONFIG_TRACE_BPRINTK
+extern int trace_bprintk_enable;
+#endif
+
 #define TRACE_OLD_SIZE		88
 
 struct trace_field_cont {
@@ -285,6 +296,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
+		IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
 			  TRACE_MMIO_RW);				\
-- 
cgit v1.2.3


From 769b0441f438c4bb4872cb8560eb6fe51bcc09ee Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 6 Mar 2009 17:21:49 +0100
Subject: tracing/core: drop the old trace_printk() implementation in favour of
 trace_bprintk()

Impact: faster and lighter tracing

Now that we have trace_bprintk() which is faster and consume lesser
memory than trace_printk() and has the same purpose, we can now drop
the old implementation in favour of the binary one from trace_bprintk(),
which means we move all the implementation of trace_bprintk() to
trace_printk(), so the Api doesn't change except that we must now use
trace_seq_bprintk() to print the TRACE_PRINT entries.

Some changes result of this:

- Previously, trace_bprintk depended of a single tracer and couldn't
  work without. This tracer has been dropped and the whole implementation
  of trace_printk() (like the module formats management) is now integrated
  in the tracing core (comes with CONFIG_TRACING), though we keep the file
  trace_printk (previously trace_bprintk.c) where we can find the module
  management. Thus we don't overflow trace.c

- changes some parts to use trace_seq_bprintk() to print TRACE_PRINT entries.

- change a bit trace_printk/trace_vprintk macros to support non-builtin formats
  constants, and fix 'const' qualifiers warnings. But this is all transparent for
  developers.

- etc...

V2:

- Rebase against last changes
- Fix mispell on the changelog

V3:

- Rebase against last changes (moving trace_printk() to kernel.h)

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0f5077f8f95..6140922392c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -20,7 +20,6 @@ enum trace_type {
 	TRACE_WAKE,
 	TRACE_STACK,
 	TRACE_PRINT,
-	TRACE_BPRINTK,
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
@@ -120,16 +119,10 @@ struct userstack_entry {
  */
 struct print_entry {
 	struct trace_entry	ent;
-	unsigned long		ip;
+	unsigned long 		ip;
 	int			depth;
-	char			buf[];
-};
-
-struct bprintk_entry {
-	struct trace_entry ent;
-	unsigned long ip;
-	const char *fmt;
-	u32 buf[];
+	const char		*fmt;
+	u32 			buf[];
 };
 #ifdef CONFIG_TRACE_BPRINTK
 extern int trace_bprintk_enable;
@@ -296,7 +289,6 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
-		IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
 			  TRACE_MMIO_RW);				\
-- 
cgit v1.2.3


From 9de36825b321fe9fe9cf73260554251af579f4ca Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 6 Mar 2009 17:52:03 +0100
Subject: tracing: trace_bprintk() cleanups

Impact: cleanup

Remove a few leftovers and clean up the code a bit.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1236356510-8381-5-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6140922392c..2bfb7d11fc1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -119,14 +119,11 @@ struct userstack_entry {
  */
 struct print_entry {
 	struct trace_entry	ent;
-	unsigned long 		ip;
+	unsigned long		ip;
 	int			depth;
 	const char		*fmt;
-	u32 			buf[];
+	u32			buf[];
 };
-#ifdef CONFIG_TRACE_BPRINTK
-extern int trace_bprintk_enable;
-#endif
 
 #define TRACE_OLD_SIZE		88
 
@@ -199,7 +196,7 @@ struct kmemtrace_free_entry {
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
  *  IRQS_OFF		- interrupts were disabled
- *  IRQS_NOSUPPORT 	- arch does not support irqs_disabled_flags
+ *  IRQS_NOSUPPORT	- arch does not support irqs_disabled_flags
  *  NEED_RESCED		- reschedule is requested
  *  HARDIRQ		- inside an interrupt handler
  *  SOFTIRQ		- inside a softirq handler
@@ -302,7 +299,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
- 		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
 		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
@@ -325,8 +322,8 @@ enum print_line_t {
  * flags value in struct tracer_flags.
  */
 struct tracer_opt {
-	const char 	*name; /* Will appear on the trace_options file */
-	u32 		bit; /* Mask assigned in val field in tracer_flags */
+	const char	*name; /* Will appear on the trace_options file */
+	u32		bit; /* Mask assigned in val field in tracer_flags */
 };
 
 /*
@@ -335,7 +332,7 @@ struct tracer_opt {
  */
 struct tracer_flags {
 	u32			val;
-	struct tracer_opt 	*opts;
+	struct tracer_opt	*opts;
 };
 
 /* Makes more easy to define a tracer opt */
@@ -390,7 +387,7 @@ struct tracer {
 	int			(*set_flag)(u32 old_flags, u32 bit, int set);
 	struct tracer		*next;
 	int			print_max;
-	struct tracer_flags 	*flags;
+	struct tracer_flags	*flags;
 	struct tracer_stat	*stats;
 };
 
-- 
cgit v1.2.3


From da4d03020c2af32f73e8bfbab0a66620d85bb9bb Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 9 Mar 2009 17:14:30 -0400
Subject: tracing: new format for specialized trace points

Impact: clean up and enhancement

The TRACE_EVENT_FORMAT macro looks quite ugly and is limited in its
ability to save data as well as to print the record out. Working with
Ingo Molnar, we came up with a new format that is much more pleasing to
the eye of C developers. This new macro is more C style than the old
macro, and is more obvious to what it does.

Here's the example. The only updated macro in this patch is the
sched_switch trace point.

The old method looked like this:

 TRACE_EVENT_FORMAT(sched_switch,
        TP_PROTO(struct rq *rq, struct task_struct *prev,
                struct task_struct *next),
        TP_ARGS(rq, prev, next),
        TP_FMT("task %s:%d ==> %s:%d",
              prev->comm, prev->pid, next->comm, next->pid),
        TRACE_STRUCT(
                TRACE_FIELD(pid_t, prev_pid, prev->pid)
                TRACE_FIELD(int, prev_prio, prev->prio)
                TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN],
                                    next_comm,
                                    TP_CMD(memcpy(TRACE_ENTRY->next_comm,
                                                 next->comm,
                                                 TASK_COMM_LEN)))
                TRACE_FIELD(pid_t, next_pid, next->pid)
                TRACE_FIELD(int, next_prio, next->prio)
        ),
        TP_RAW_FMT("prev %d:%d ==> next %s:%d:%d")
        );

The above method is hard to read and requires two format fields.

The new method:

 /*
  * Tracepoint for task switches, performed by the scheduler:
  *
  * (NOTE: the 'rq' argument is not used by generic trace events,
  *        but used by the latency tracer plugin. )
  */
 TRACE_EVENT(sched_switch,

	TP_PROTO(struct rq *rq, struct task_struct *prev,
		 struct task_struct *next),

	TP_ARGS(rq, prev, next),

	TP_STRUCT__entry(
		__array(	char,	prev_comm,	TASK_COMM_LEN	)
		__field(	pid_t,	prev_pid			)
		__field(	int,	prev_prio			)
		__array(	char,	next_comm,	TASK_COMM_LEN	)
		__field(	pid_t,	next_pid			)
		__field(	int,	next_prio			)
	),

	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
		__entry->next_comm, __entry->next_pid, __entry->next_prio),

	TP_fast_assign(
		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
		__entry->prev_pid	= prev->pid;
		__entry->prev_prio	= prev->prio;
		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
		__entry->next_pid	= next->pid;
		__entry->next_prio	= next->prio;
	)
 );

This macro is called TRACE_EVENT, it is broken up into 5 parts:

 TP_PROTO:        the proto type of the trace point
 TP_ARGS:         the arguments of the trace point
 TP_STRUCT_entry: the structure layout of the entry in the ring buffer
 TP_printk:       the printk format
 TP_fast_assign:  the method used to write the entry into the ring buffer

The structure is the definition of how the event will be saved in the
ring buffer. The printk is used by the internal tracing in case of
an oops, and the kernel needs to print out the format of the record
to the console. This the TP_printk gives a means to show the records
in a human readable format. It is also used to print out the data
from the trace file.

The TP_fast_assign is executed directly. It is basically like a C function,
where the __entry is the handle to the record.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2bfb7d11fc1..c5e1d8865fe 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -751,12 +751,7 @@ struct ftrace_event_call {
 	int		(*regfunc)(void);
 	void		(*unregfunc)(void);
 	int		id;
-	struct dentry	*raw_dir;
-	int		raw_enabled;
-	int		type;
 	int		(*raw_init)(void);
-	int		(*raw_reg)(void);
-	void		(*raw_unreg)(void);
 	int		(*show_format)(struct trace_seq *s);
 };
 
-- 
cgit v1.2.3


From 1852fcce181faa237c010a3dbedb473cf9d4555f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 11 Mar 2009 14:33:00 -0400
Subject: tracing: expand the ring buffers when an event is activated

To save memory, the tracer ring buffers are set to a minimum.
The activating of a trace expands the ring buffer size. This patch
adds this expanding, when an event is activated.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c5e1d8865fe..336324d717f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -737,6 +737,9 @@ static inline void trace_branch_disable(void)
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
+/* set ring buffers to default size if not already done so */
+int tracing_update_buffers(void);
+
 /* trace event type bit fields, not numeric */
 enum {
 	TRACE_EVENT_TYPE_PRINTF		= 1,
-- 
cgit v1.2.3


From 48ead02030f849d011259244bb4ea9b985479006 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 12 Mar 2009 18:24:49 +0100
Subject: tracing/core: bring back raw trace_printk for dynamic formats strings

Impact: fix callsites with dynamic format strings

Since its new binary implementation, trace_printk() internally uses static
containers for the format strings on each callsites. But the value is
assigned once at build time, which means that it can't take dynamic
formats.

So this patch unearthes the raw trace_printk implementation for the callers
that will need trace_printk to be able to carry these dynamic format
strings. The trace_printk() macro will use the appropriate implementation
for each callsite. Most of the time however, the binary implementation will
still be used.

The other impact of this patch is that mmiotrace_printk() will use the old
implementation because it calls the low level trace_vprintk and we can't
guess here whether the format passed in it is dynamic or not.

Some parts of this patch have been written by Steven Rostedt (most notably
the part that chooses the appropriate implementation for each callsites).

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 336324d717f..cede1ab49d0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -20,6 +20,7 @@ enum trace_type {
 	TRACE_WAKE,
 	TRACE_STACK,
 	TRACE_PRINT,
+	TRACE_BPRINT,
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
@@ -117,7 +118,7 @@ struct userstack_entry {
 /*
  * trace_printk entry:
  */
-struct print_entry {
+struct bprint_entry {
 	struct trace_entry	ent;
 	unsigned long		ip;
 	int			depth;
@@ -125,6 +126,13 @@ struct print_entry {
 	u32			buf[];
 };
 
+struct print_entry {
+	struct trace_entry	ent;
+	unsigned long		ip;
+	int			depth;
+	char			buf[];
+};
+
 #define TRACE_OLD_SIZE		88
 
 struct trace_field_cont {
@@ -286,6 +294,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
+		IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT);	\
 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
 			  TRACE_MMIO_RW);				\
@@ -570,6 +579,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 extern void *head_page(struct trace_array_cpu *data);
 extern long ns2usecs(cycle_t nsec);
 extern int
+trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args);
+extern int
 trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
 
 extern unsigned long trace_flags;
-- 
cgit v1.2.3


From e9fb2b6d5845e24f104713591286b6f39761c027 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 12 Mar 2009 14:19:25 -0400
Subject: tracing: have event_trace_printk use static tracer

Impact: speed up on event tracing

The event_trace_printk is currently a wrapper function that calls
trace_vprintk. Because it uses a variable for the fmt it misses out
on the optimization of using the binary printk.

This patch makes event_trace_printk into a macro wrapper to use the
fmt as the same as the trace_printks.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cede1ab49d0..35cfa7bbaf3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -773,4 +773,21 @@ void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
 
+extern const char *__start___trace_bprintk_fmt[];
+extern const char *__stop___trace_bprintk_fmt[];
+
+#define event_trace_printk(ip, fmt, args...)				\
+do {									\
+	__trace_printk_check_format(fmt, ##args);			\
+	tracing_record_cmdline(current);				\
+	if (__builtin_constant_p(fmt)) {				\
+		static const char *trace_printk_fmt			\
+		  __attribute__((section("__trace_printk_fmt"))) =	\
+			__builtin_constant_p(fmt) ? fmt : NULL;		\
+									\
+		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
+	} else								\
+		__trace_printk(ip, fmt, ##args);			\
+} while (0)
+
 #endif /* _LINUX_KERNEL_TRACE_H */
-- 
cgit v1.2.3


From bdc067582b8b71c7771bab076bbc51569c594fb4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 13 Mar 2009 00:12:52 -0400
Subject: tracing: add comment for use of double __builtin_consant_p

Impact: documentation

The use of the double __builtin_contant_p checks in the event_trace_printk
can be confusing to developers and reviewers. This patch adds a comment
to explain why it is there.

Requested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
LKML-Reference: <20090313122235.43EB.A69D9226@jp.fujitsu.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 35cfa7bbaf3..67595b8f0f1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -776,6 +776,11 @@ extern struct ftrace_event_call __stop_ftrace_events[];
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement optimizing out.
+ */
 #define event_trace_printk(ip, fmt, args...)				\
 do {									\
 	__trace_printk_check_format(fmt, ##args);			\
-- 
cgit v1.2.3


From ee08c6eccb7d1295516f7cf420fddf7b14e9146f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 7 Mar 2009 05:52:59 +0100
Subject: tracing/ftrace: syscall tracing infrastructure, basics

Provide basic callbacks to do syscall tracing.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <1236401580-5758-2-git-send-email-fweisbec@gmail.com>
[ simplified it to a trace_printk() for now. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c5e1d8865fe..3d49daae47d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -30,6 +30,8 @@ enum trace_type {
 	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
 	TRACE_HW_BRANCHES,
+	TRACE_SYSCALL_ENTER,
+	TRACE_SYSCALL_EXIT,
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
 	TRACE_POWER,
-- 
cgit v1.2.3


From bed1ffca022cc876fb83161d26670e9b5d3cf36b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 13 Mar 2009 15:42:11 +0100
Subject: tracing/syscalls: core infrastructure for syscalls tracing,
 enhancements

Impact: new feature

This adds the generic support for syscalls tracing. This is
currently exploited through a devoted tracer but other tracing
engines can use it. (They just have to play with
{start,stop}_ftrace_syscalls() and use the display callbacks
unless they want to override them.)

The syscalls prototypes definitions are abused here to steal
some metadata informations:

- syscall name, param types, param names, number of params

The syscall addr is not directly saved during this definition
because we don't know if its prototype is available in the
namespace. But we don't really need it. The arch has just to
build a function able to resolve the syscall number to its
metadata struct.

The current tracer prints the syscall names, parameters names
and values (and their types optionally). Currently the value is
a raw hex but higher level values diplaying is on my TODO list.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1236955332-10133-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3d49daae47d..d80ca0d464d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -194,6 +194,19 @@ struct kmemtrace_free_entry {
 	const void *ptr;
 };
 
+struct syscall_trace_enter {
+	struct trace_entry	ent;
+	int			nr;
+	unsigned long		args[];
+};
+
+struct syscall_trace_exit {
+	struct trace_entry	ent;
+	int			nr;
+	unsigned long		ret;
+};
+
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -306,6 +319,10 @@ extern void __ftrace_bad_type(void);
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
 			  TRACE_KMEM_FREE);	\
+		IF_ASSIGN(var, ent, struct syscall_trace_enter,		\
+			  TRACE_SYSCALL_ENTER);				\
+		IF_ASSIGN(var, ent, struct syscall_trace_exit,		\
+			  TRACE_SYSCALL_EXIT);				\
 		__ftrace_bad_type();					\
 	} while (0)
 
-- 
cgit v1.2.3


From 4ca530852346be239b7c19e7bec5d2b78855bebe Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 16 Mar 2009 19:20:15 -0400
Subject: tracing: protect reader of cmdline output

Impact: fix to one cause of incorrect comm outputs in trace

The spinlock only protected the creation of a comm <=> pid pair.
But it was possible that a reader could look up a pid, and get the
wrong comm because it had no locking.

This also required changing trace_find_cmdline to copy the comm cache
and not just send back a pointer to it.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 56ce34d90b0..b0ecad8ecc3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -547,7 +547,7 @@ struct tracer_switch_ops {
 };
 #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
 
-extern char *trace_find_cmdline(int pid);
+extern void trace_find_cmdline(int pid, char comm[]);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
-- 
cgit v1.2.3


From af4617bdba34aa556272b34c3986b0a4d588f568 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 17 Mar 2009 18:09:55 -0400
Subject: tracing: add global-clock option to provide cross CPU clock to traces

Impact: feature to allow better serialized clock

This patch adds an option called "global-clock" that will allow
the tracer to switch to a slower but more accurate (across CPUs)
clock.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b0ecad8ecc3..26a7a28ca11 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -667,6 +667,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_PRINTK_MSGONLY	= 0x10000,
 	TRACE_ITER_CONTEXT_INFO		= 0x20000, /* Print pid/cpu/time */
 	TRACE_ITER_LATENCY_FMT		= 0x40000,
+	TRACE_ITER_GLOBAL_CLK		= 0x80000,
 };
 
 /*
-- 
cgit v1.2.3


From 40ce74f19c28077550646c76d96a075bf312e461 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 19 Mar 2009 14:03:53 -0400
Subject: tracing: remove recording function depth from trace_printk

The function depth in trace_printk was to facilitate the function
graph output. Now that the function graph calculates the depth within
the trace output, we no longer need to record the depth when the
trace_printk is called.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 38276d1638e..7c9a0cbf5dc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -123,7 +123,6 @@ struct userstack_entry {
 struct bprint_entry {
 	struct trace_entry	ent;
 	unsigned long		ip;
-	int			depth;
 	const char		*fmt;
 	u32			buf[];
 };
@@ -131,7 +130,6 @@ struct bprint_entry {
 struct print_entry {
 	struct trace_entry	ent;
 	unsigned long		ip;
-	int			depth;
 	char			buf[];
 };
 
@@ -598,9 +596,9 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 extern void *head_page(struct trace_array_cpu *data);
 extern long ns2usecs(cycle_t nsec);
 extern int
-trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args);
+trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
 extern int
-trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
+trace_vprintk(unsigned long ip, const char *fmt, va_list args);
 
 extern unsigned long trace_flags;
 
-- 
cgit v1.2.3


From ac199db0189c091f2863312061c0575937f68810 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 19 Mar 2009 20:26:15 +0100
Subject: ftrace: event profile hooks

Impact: new tracing infrastructure feature

Provide infrastructure to generate software perf counter events
from tracepoints.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090319194233.557364871@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7c9a0cbf5dc..7cfb741be20 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -785,12 +785,23 @@ struct ftrace_event_call {
 	int		id;
 	int		(*raw_init)(void);
 	int		(*show_format)(struct trace_seq *s);
+
+#ifdef CONFIG_EVENT_PROFILE
+	atomic_t	profile_count;
+	int		(*profile_enable)(struct ftrace_event_call *);
+	void		(*profile_disable)(struct ftrace_event_call *);
+#endif
 };
 
 void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
 
+#define for_each_event(event)						\
+	for (event = __start_ftrace_events;				\
+	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+	     event++)
+
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
-- 
cgit v1.2.3


From cf027f645e6aee4f0ca6197a6b6a57f327fdb13f Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Sun, 22 Mar 2009 03:30:39 -0500
Subject: tracing: add run-time field descriptions for event filtering

This patch makes the field descriptions defined for event tracing
available at run-time, for the event-filtering mechanism introduced
in a subsequent patch.

The common event fields are prepended with 'common_' in the format
display, allowing them to be distinguished from the other fields
that might internally have same name and can therefore be
unambiguously used in filters.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1237710639.7703.46.camel@charm-linux>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7cfb741be20..9288dc7ad14 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -775,16 +775,26 @@ enum {
 	TRACE_EVENT_TYPE_RAW		= 2,
 };
 
+struct ftrace_event_field {
+	struct list_head	link;
+	char			*name;
+	char			*type;
+	int			offset;
+	int			size;
+};
+
 struct ftrace_event_call {
-	char		*name;
-	char		*system;
-	struct dentry	*dir;
-	int		enabled;
-	int		(*regfunc)(void);
-	void		(*unregfunc)(void);
-	int		id;
-	int		(*raw_init)(void);
-	int		(*show_format)(struct trace_seq *s);
+	char			*name;
+	char			*system;
+	struct dentry		*dir;
+	int			enabled;
+	int			(*regfunc)(void);
+	void			(*unregfunc)(void);
+	int			id;
+	int			(*raw_init)(void);
+	int			(*show_format)(struct trace_seq *s);
+	int			(*define_fields)(void);
+	struct list_head	fields;
 
 #ifdef CONFIG_EVENT_PROFILE
 	atomic_t	profile_count;
@@ -793,6 +803,8 @@ struct ftrace_event_call {
 #endif
 };
 
+int trace_define_field(struct ftrace_event_call *call, char *type,
+		       char *name, int offset, int size);
 void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
-- 
cgit v1.2.3


From 7ce7e4249921d5073e764f7ff7ad83cfa9894bd7 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Sun, 22 Mar 2009 03:31:04 -0500
Subject: tracing: add per-event filtering

This patch adds per-event filtering to the event tracing subsystem.

It adds a 'filter' debugfs file to each event directory.  This file can
be written to to set filters; reading from it will display the current
set of filters set for that event.

Basically, any field listed in the 'format' file for an event can be
filtered on (including strings, but not yet other array types) using
either matching ('==') or non-matching ('!=') 'predicates'.  A
'predicate' can be either a single expression:

 # echo pid != 0 > filter

 # cat filter
 pid != 0

or a compound expression of up to 8 sub-expressions combined using '&&'
or '||':

 # echo comm == Xorg > filter
 # echo "&& sig != 29" > filter

 # cat filter
 comm == Xorg
 && sig != 29

Only events having field values matching an expression will be available
in the trace output; non-matching events are discarded.

Note that a compound expression is built up by echoing each
sub-expression separately - it's not the most efficient way to do
things, but it keeps the parser simple and assumes that compound
expressions will be relatively uncommon.  In any case, a subsequent
patch introducing a way to set filters for entire subsystems should
mitigate any need to do this for lots of events.

Setting a filter without an '&&' or '||' clears the previous filter
completely and sets the filter to the new expression:

 # cat filter
 comm == Xorg
 && sig != 29

 # echo comm != Xorg

 # cat filter
 comm != Xorg

To clear a filter, echo 0 to the filter file:

 # echo 0 > filter
 # cat filter
 none

The limit of 8 predicates for a compound expression is arbitrary - for
efficiency, it's implemented as an array of pointers to predicates, and
8 seemed more than enough for any filter...

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1237710665.7703.48.camel@charm-linux>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9288dc7ad14..d9eb39e4bb3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -795,6 +795,7 @@ struct ftrace_event_call {
 	int			(*show_format)(struct trace_seq *s);
 	int			(*define_fields)(void);
 	struct list_head	fields;
+	struct filter_pred	**preds;
 
 #ifdef CONFIG_EVENT_PROFILE
 	atomic_t	profile_count;
@@ -803,8 +804,35 @@ struct ftrace_event_call {
 #endif
 };
 
+#define MAX_FILTER_PRED 8
+
+struct filter_pred;
+
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
+
+struct filter_pred {
+	filter_pred_fn_t fn;
+	u64 val;
+	char *str_val;
+	int str_len;
+	char *field_name;
+	int offset;
+	int not;
+	int or;
+	int compound;
+	int clear;
+};
+
 int trace_define_field(struct ftrace_event_call *call, char *type,
 		       char *name, int offset, int size);
+extern void filter_free_pred(struct filter_pred *pred);
+extern int filter_print_preds(struct filter_pred **preds, char *buf);
+extern int filter_parse(char **pbuf, struct filter_pred *pred);
+extern int filter_add_pred(struct ftrace_event_call *call,
+			   struct filter_pred *pred);
+extern void filter_free_preds(struct ftrace_event_call *call);
+extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+
 void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
 extern struct ftrace_event_call __stop_ftrace_events[];
-- 
cgit v1.2.3


From cfb180f3e71b2a280a254c8646a9ab1beab63f84 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Sun, 22 Mar 2009 03:31:17 -0500
Subject: tracing: add per-subsystem filtering

This patch adds per-subsystem filtering to the event tracing subsystem.

It adds a 'filter' debugfs file to each subsystem directory.  This file
can be written to to set filters; reading from it will display the
current set of filters set for that subsystem.

Basically what it does is propagate the filter down to each event
contained in the subsystem.  If a particular event doesn't have a field
with the name specified in the filter, it simply doesn't get set for
that event.  You can verify whether or not the filter was set for a
particular event by looking at the filter file for that event.

As with per-event filters, compound expressions are supported, echoing
'0' to the subsystem's filter file clears all filters in the subsystem,
etc.

Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1237710677.7703.49.camel@charm-linux>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d9eb39e4bb3..f267723c3c5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -804,6 +804,18 @@ struct ftrace_event_call {
 #endif
 };
 
+struct event_subsystem {
+	struct list_head	list;
+	const char		*name;
+	struct dentry		*entry;
+	struct filter_pred	**preds;
+};
+
+#define events_for_each(event)						\
+	for (event = __start_ftrace_events;				\
+	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+	     event++)
+
 #define MAX_FILTER_PRED 8
 
 struct filter_pred;
@@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call,
 			   struct filter_pred *pred);
 extern void filter_free_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern void filter_free_subsystem_preds(struct event_subsystem *system);
+extern int filter_add_subsystem_pred(struct event_subsystem *system,
+				     struct filter_pred *pred);
 
 void event_trace_printk(unsigned long ip, const char *fmt, ...);
 extern struct ftrace_event_call __start_ftrace_events[];
-- 
cgit v1.2.3


From 07edf7121374609709ef1b0889f6e7b8d6a62ec1 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 22 Mar 2009 23:10:46 +0100
Subject: tracing/events: don't use wake up for events

Impact: fix hard-lockup with sched switch events

Some ftrace events, such as sched wakeup, can be traced
while the runqueue lock is hold. Since they are using
trace_current_buffer_unlock_commit(), they call wake_up()
which can try to grab the runqueue lock too, resulting in
a deadlock.

Now for all event, we call a new helper:
trace_nowake_buffer_unlock_commit() which do pretty the same than
trace_current_buffer_unlock_commit() except than it doesn't call
trace_wake_up().

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1237759847-21025-4-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f267723c3c5..54fd9bcd0a6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -483,6 +483,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
 				  unsigned long flags, int pc);
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
 					unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+					unsigned long flags, int pc);
 
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 						struct trace_array_cpu *data);
-- 
cgit v1.2.3


From 4bda2d517bfa3ce3d7044e06988cdddae7adffe2 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <tzanussi@gmail.com>
Date: Tue, 24 Mar 2009 02:14:31 -0500
Subject: tracing/filters: use trace_seq_printf() to print filters

Impact: cleanup

Instead of just using the trace_seq buffer to print the filters, use
trace_seq_printf() as it was intended to be used.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Tom Zanussi <tzanussi@gmail.com>
Cc: =?ISO-8859-1?Q?Fr=E9d=E9ric?= Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1237878871.8339.59.camel@charm-linux>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 54fd9bcd0a6..90a848debcb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -840,7 +840,8 @@ struct filter_pred {
 int trace_define_field(struct ftrace_event_call *call, char *type,
 		       char *name, int offset, int size);
 extern void filter_free_pred(struct filter_pred *pred);
-extern int filter_print_preds(struct filter_pred **preds, char *buf);
+extern void filter_print_preds(struct filter_pred **preds,
+			       struct trace_seq *s);
 extern int filter_parse(char **pbuf, struct filter_pred *pred);
 extern int filter_add_pred(struct ftrace_event_call *call,
 			   struct filter_pred *pred);
-- 
cgit v1.2.3


From be6f164a02f394675e2ac2077dd354cebef5b4c0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 24 Mar 2009 11:06:24 -0400
Subject: function-graph: add option for include sleep times

Impact: give user a choice to show times spent while sleeping

The user may want to see the time a function spent sleeping.
This patch adds the trace option "sleep-time" to allow that.
The "sleep-time" option is default on.

 echo sleep-time > /debug/tracing/trace_options

produces:

 ------------------------------------------
 2)  avahi-d-3428  =>    <idle>-0
 ------------------------------------------

 2)               |      finish_task_switch() {
 2)   0.621 us    |        _spin_unlock_irq();
 2)   2.202 us    |      }
 2) ! 1002.197 us |    }
 2) ! 1003.521 us |  }

where as,

 echo nosleep-time > /debug/tracing/trace_options

produces:

 0)    <idle>-0    =>  yum-upd-3416
 ------------------------------------------

 0)               |              finish_task_switch() {
 0)   0.643 us    |                _spin_unlock_irq();
 0)   2.342 us    |              }
 0) + 41.302 us   |            }
 0) + 42.453 us   |          }

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/trace/trace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 7cfb741be20..d7410bbb9a8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -683,6 +683,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_CONTEXT_INFO		= 0x20000, /* Print pid/cpu/time */
 	TRACE_ITER_LATENCY_FMT		= 0x40000,
 	TRACE_ITER_GLOBAL_CLK		= 0x80000,
+	TRACE_ITER_SLEEP_TIME		= 0x100000,
 };
 
 /*
-- 
cgit v1.2.3


From ca2b84cb3c4a0d4d2143b46ec072cdff5d1b3b87 Mon Sep 17 00:00:00 2001
From: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Date: Mon, 23 Mar 2009 15:12:24 +0200
Subject: kmemtrace: use tracepoints

kmemtrace now uses tracepoints instead of markers. We no longer need to
use format specifiers to pass arguments.

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
[ folded: Use the new TP_PROTO and TP_ARGS to fix the build.     ]
[ folded: fix build when CONFIG_KMEMTRACE is disabled.           ]
[ folded: define tracepoints when CONFIG_TRACEPOINTS is enabled. ]
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <ae61c0f37156db8ec8dc0d5778018edde60a92e3.1237813499.git.eduard.munteanu@linux360.ro>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cb0ce3fc36d..cbc168f1e43 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -182,6 +182,12 @@ struct trace_power {
 	struct power_trace	state_data;
 };
 
+enum kmemtrace_type_id {
+	KMEMTRACE_TYPE_KMALLOC = 0,	/* kmalloc() or kfree(). */
+	KMEMTRACE_TYPE_CACHE,		/* kmem_cache_*(). */
+	KMEMTRACE_TYPE_PAGES,		/* __get_free_pages() and friends. */
+};
+
 struct kmemtrace_alloc_entry {
 	struct trace_entry	ent;
 	enum kmemtrace_type_id type_id;
-- 
cgit v1.2.3


From cf8e3474654f20433aab9aa35826d43b5f245008 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 30 Mar 2009 13:48:00 +0800
Subject: tracing: fix incorrect return type of ns2usecs()

Impact: fix time output bug in 32bits system

ns2usecs() returns 'long', it's incorrect.

(In i386)
...
          <idle>-0     [000]   521.442100: _spin_lock <-tick_do_update_jiffies64
          <idle>-0     [000]   521.442101: do_timer <-tick_do_update_jiffies64
          <idle>-0     [000]   521.442102: update_wall_time <-do_timer
          <idle>-0     [000]   521.442102: update_xtime_cache <-update_wall_time
....
(It always print the time less than 2200 seconds besides ...)
Because 'long' is 32bits in i386. ( (1<<31) useconds is about 2200 seconds)

...
          <idle>-0     [001] 4154502640.134759: rcu_bh_qsctr_inc <-__do_softirq
          <idle>-0     [001] 4154502640.134760: _local_bh_enable <-__do_softirq
          <idle>-0     [001] 4154502640.134761: idle_cpu <-irq_exit
...
(very large value)
Because 'long' is a signed type and it is 32bits in i386.

Changes in v2:
return 'unsigned long long' instead of 'cycle_t'

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <49D05D10.4030009@cn.fujitsu.com>
Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/trace/trace.h')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cb0ce3fc36d..0d81a4a2a4a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -596,7 +596,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern long ns2usecs(cycle_t nsec);
+extern unsigned long long ns2usecs(cycle_t nsec);
 extern int
 trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
 extern int
-- 
cgit v1.2.3