aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig16
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c496
-rw-r--r--kernel/trace/events.c1
-rw-r--r--kernel/trace/ftrace.c116
-rw-r--r--kernel/trace/kmemtrace.c319
-rw-r--r--kernel/trace/ring_buffer.c196
-rw-r--r--kernel/trace/trace.c274
-rw-r--r--kernel/trace/trace.h120
-rw-r--r--kernel/trace/trace_clock.c1
-rw-r--r--kernel/trace/trace_event_profile.c31
-rw-r--r--kernel/trace/trace_event_types.h2
-rw-r--r--kernel/trace/trace_events.c252
-rw-r--r--kernel/trace/trace_events_filter.c427
-rw-r--r--kernel/trace/trace_events_stage_2.h45
-rw-r--r--kernel/trace/trace_events_stage_3.h68
-rw-r--r--kernel/trace/trace_functions_graph.c239
-rw-r--r--kernel/trace/trace_mmiotrace.c2
-rw-r--r--kernel/trace/trace_nop.c1
-rw-r--r--kernel/trace/trace_output.c74
-rw-r--r--kernel/trace/trace_output.h44
-rw-r--r--kernel/trace/trace_power.c8
-rw-r--r--kernel/trace/trace_printk.c8
-rw-r--r--kernel/trace/trace_sched_switch.c9
-rw-r--r--kernel/trace/trace_selftest.c80
-rw-r--r--kernel/trace/trace_stat.c47
-rw-r--r--kernel/trace/trace_syscalls.c171
-rw-r--r--kernel/trace/trace_workqueue.c12
28 files changed, 2299 insertions, 762 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 95a0ad191f1..2246141bda4 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -63,7 +63,11 @@ config TRACING
#
config TRACING_SUPPORT
bool
- depends on TRACE_IRQFLAGS_SUPPORT
+ # PPC32 has no irqflags tracing support, but it can use most of the
+ # tracers anyway, they were tested to build and work. Note that new
+ # exceptions to this list aren't welcomed, better implement the
+ # irqflags tracing for your architecture.
+ depends on TRACE_IRQFLAGS_SUPPORT || PPC32
depends on STACKTRACE_SUPPORT
default y
@@ -95,11 +99,10 @@ config FUNCTION_GRAPH_TRACER
help
Enable the kernel to trace a function at both its return
and its entry.
- It's first purpose is to trace the duration of functions and
- draw a call graph for each thread with some informations like
- the return value.
- This is done by setting the current return address on the current
- task structure into a stack of calls.
+ Its first purpose is to trace the duration of functions and
+ draw a call graph for each thread with some information like
+ the return value. This is done by setting the current return
+ address on the current task structure into a stack of calls.
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
@@ -182,6 +185,7 @@ config FTRACE_SYSCALLS
bool "Trace syscalls"
depends on HAVE_FTRACE_SYSCALLS
select TRACING
+ select KALLSYMS
help
Basic tracer to catch the syscall entry and exit events.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c3feea01c3e..2630f5121ec 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -44,5 +44,7 @@ obj-$(CONFIG_EVENT_TRACER) += trace_events.o
obj-$(CONFIG_EVENT_TRACER) += events.o
obj-$(CONFIG_EVENT_TRACER) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
+obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
+obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1f32e4edf49..947c5b3f90c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -30,7 +30,7 @@
static unsigned int blktrace_seq __read_mostly = 1;
static struct trace_array *blk_tr;
-static int __read_mostly blk_tracer_enabled;
+static bool blk_tracer_enabled __read_mostly;
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = {
};
/* Global reference count of probes */
-static DEFINE_MUTEX(blk_probe_mutex);
static atomic_t blk_probes_ref = ATOMIC_INIT(0);
-static int blk_register_tracepoints(void);
+static void blk_register_tracepoints(void);
static void blk_unregister_tracepoints(void);
/*
@@ -60,22 +59,39 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
const void *data, size_t len)
{
struct blk_io_trace *t;
+ struct ring_buffer_event *event = NULL;
+ int pc = 0;
+ int cpu = smp_processor_id();
+ bool blk_tracer = blk_tracer_enabled;
+
+ if (blk_tracer) {
+ pc = preempt_count();
+ event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
+ sizeof(*t) + len,
+ 0, pc);
+ if (!event)
+ return;
+ t = ring_buffer_event_data(event);
+ goto record_it;
+ }
if (!bt->rchan)
return;
t = relay_reserve(bt->rchan, sizeof(*t) + len);
if (t) {
- const int cpu = smp_processor_id();
-
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
t->time = ktime_to_ns(ktime_get());
+record_it:
t->device = bt->dev;
t->action = action;
t->pid = pid;
t->cpu = cpu;
t->pdu_len = len;
memcpy((void *) t + sizeof(*t), data, len);
+
+ if (blk_tracer)
+ trace_buffer_unlock_commit(blk_tr, event, 0, pc);
}
}
@@ -111,14 +127,8 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
unsigned long flags;
char *buf;
- if (blk_tr) {
- va_start(args, fmt);
- ftrace_vprintk(fmt, args);
- va_end(args);
- return;
- }
-
- if (!bt->msg_data)
+ if (unlikely(bt->trace_state != Blktrace_running &&
+ !blk_tracer_enabled))
return;
local_irq_save(flags);
@@ -148,8 +158,8 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
/*
* Data direction bit lookup
*/
-static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ),
- BLK_TC_ACT(BLK_TC_WRITE) };
+static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
+ BLK_TC_ACT(BLK_TC_WRITE) };
/* The ilog2() calls fall out because they're constant */
#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
@@ -169,9 +179,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
unsigned long *sequence;
pid_t pid;
int cpu, pc = 0;
+ bool blk_tracer = blk_tracer_enabled;
- if (unlikely(bt->trace_state != Blktrace_running ||
- !blk_tracer_enabled))
+ if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
return;
what |= ddir_act[rw & WRITE];
@@ -186,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
cpu = raw_smp_processor_id();
- if (blk_tr) {
+ if (blk_tracer) {
tracing_record_cmdline(current);
pc = preempt_count();
@@ -236,7 +246,7 @@ record_it:
if (pdu_len)
memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
- if (blk_tr) {
+ if (blk_tracer) {
trace_buffer_unlock_commit(blk_tr, event, 0, pc);
return;
}
@@ -248,7 +258,7 @@ record_it:
static struct dentry *blk_tree_root;
static DEFINE_MUTEX(blk_tree_mutex);
-static void blk_trace_cleanup(struct blk_trace *bt)
+static void blk_trace_free(struct blk_trace *bt)
{
debugfs_remove(bt->msg_file);
debugfs_remove(bt->dropped_file);
@@ -256,10 +266,13 @@ static void blk_trace_cleanup(struct blk_trace *bt)
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
- mutex_lock(&blk_probe_mutex);
+}
+
+static void blk_trace_cleanup(struct blk_trace *bt)
+{
+ blk_trace_free(bt);
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
}
int blk_trace_remove(struct request_queue *q)
@@ -270,8 +283,7 @@ int blk_trace_remove(struct request_queue *q)
if (!bt)
return -EINVAL;
- if (bt->trace_state == Blktrace_setup ||
- bt->trace_state == Blktrace_stopped)
+ if (bt->trace_state != Blktrace_running)
blk_trace_cleanup(bt);
return 0;
@@ -414,11 +426,11 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (buts->name[i] == '/')
buts->name[i] = '_';
- ret = -ENOMEM;
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
- goto err;
+ return -ENOMEM;
+ ret = -ENOMEM;
bt->sequence = alloc_percpu(unsigned long);
if (!bt->sequence)
goto err;
@@ -429,11 +441,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
ret = -ENOENT;
+ mutex_lock(&blk_tree_mutex);
if (!blk_tree_root) {
blk_tree_root = debugfs_create_dir("block", NULL);
- if (!blk_tree_root)
- return -ENOMEM;
+ if (!blk_tree_root) {
+ mutex_unlock(&blk_tree_mutex);
+ goto err;
+ }
}
+ mutex_unlock(&blk_tree_mutex);
dir = debugfs_create_dir(buts->name, blk_tree_root);
@@ -471,14 +487,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- mutex_lock(&blk_probe_mutex);
- if (atomic_add_return(1, &blk_probes_ref) == 1) {
- ret = blk_register_tracepoints();
- if (ret)
- goto probe_err;
- }
- mutex_unlock(&blk_probe_mutex);
-
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
if (old_bt) {
@@ -486,22 +494,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
goto err;
}
+ if (atomic_inc_return(&blk_probes_ref) == 1)
+ blk_register_tracepoints();
+
return 0;
-probe_err:
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
err:
- if (bt) {
- if (bt->msg_file)
- debugfs_remove(bt->msg_file);
- if (bt->dropped_file)
- debugfs_remove(bt->dropped_file);
- free_percpu(bt->sequence);
- free_percpu(bt->msg_data);
- if (bt->rchan)
- relay_close(bt->rchan);
- kfree(bt);
- }
+ blk_trace_free(bt);
return ret;
}
@@ -863,7 +861,7 @@ void blk_add_driver_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
-static int blk_register_tracepoints(void)
+static void blk_register_tracepoints(void)
{
int ret;
@@ -901,7 +899,6 @@ static int blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_remap(blk_add_trace_remap);
WARN_ON(ret);
- return 0;
}
static void blk_unregister_tracepoints(void)
@@ -934,25 +931,31 @@ static void blk_unregister_tracepoints(void)
static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
{
int i = 0;
+ int tc = t->action >> BLK_TC_SHIFT;
+
+ if (t->action == BLK_TN_MESSAGE) {
+ rwbs[i++] = 'N';
+ goto out;
+ }
- if (t->action & BLK_TC_DISCARD)
+ if (tc & BLK_TC_DISCARD)
rwbs[i++] = 'D';
- else if (t->action & BLK_TC_WRITE)
+ else if (tc & BLK_TC_WRITE)
rwbs[i++] = 'W';
else if (t->bytes)
rwbs[i++] = 'R';
else
rwbs[i++] = 'N';
- if (t->action & BLK_TC_AHEAD)
+ if (tc & BLK_TC_AHEAD)
rwbs[i++] = 'A';
- if (t->action & BLK_TC_BARRIER)
+ if (tc & BLK_TC_BARRIER)
rwbs[i++] = 'B';
- if (t->action & BLK_TC_SYNC)
+ if (tc & BLK_TC_SYNC)
rwbs[i++] = 'S';
- if (t->action & BLK_TC_META)
+ if (tc & BLK_TC_META)
rwbs[i++] = 'M';
-
+out:
rwbs[i] = '\0';
}
@@ -979,7 +982,7 @@ static inline unsigned long long t_sector(const struct trace_entry *ent)
static inline __u16 t_error(const struct trace_entry *ent)
{
- return te_blk_io_trace(ent)->sector;
+ return te_blk_io_trace(ent)->error;
}
static __u64 get_pdu_int(const struct trace_entry *ent)
@@ -999,35 +1002,39 @@ static void get_pdu_remap(const struct trace_entry *ent,
r->sector = be64_to_cpu(sector);
}
-static int blk_log_action_iter(struct trace_iterator *iter, const char *act)
+typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
+
+static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
{
char rwbs[6];
- unsigned long long ts = ns2usecs(iter->ts);
- unsigned long usec_rem = do_div(ts, USEC_PER_SEC);
+ unsigned long long ts = iter->ts;
+ unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
unsigned secs = (unsigned long)ts;
- const struct trace_entry *ent = iter->ent;
- const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
+ const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
fill_rwbs(rwbs, t);
return trace_seq_printf(&iter->seq,
- "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ",
+ "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ",
MAJOR(t->device), MINOR(t->device), iter->cpu,
- secs, usec_rem, ent->pid, act, rwbs);
+ secs, nsec_rem, iter->ent->pid, act, rwbs);
}
-static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t,
- const char *act)
+static int blk_log_action(struct trace_iterator *iter, const char *act)
{
char rwbs[6];
+ const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
+
fill_rwbs(rwbs, t);
- return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
+ return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
MAJOR(t->device), MINOR(t->device), act, rwbs);
}
static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
{
- const char *cmd = trace_find_cmdline(ent->pid);
+ char cmd[TASK_COMM_LEN];
+
+ trace_find_cmdline(ent->pid, cmd);
if (t_sec(ent))
return trace_seq_printf(s, "%llu + %u [%s]\n",
@@ -1057,19 +1064,41 @@ static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
{
- return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid));
+ char cmd[TASK_COMM_LEN];
+
+ trace_find_cmdline(ent->pid, cmd);
+
+ return trace_seq_printf(s, "[%s]\n", cmd);
}
static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
{
- return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid),
- get_pdu_int(ent));
+ char cmd[TASK_COMM_LEN];
+
+ trace_find_cmdline(ent->pid, cmd);
+
+ return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent));
}
static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
{
+ char cmd[TASK_COMM_LEN];
+
+ trace_find_cmdline(ent->pid, cmd);
+
return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
- get_pdu_int(ent), trace_find_cmdline(ent->pid));
+ get_pdu_int(ent), cmd);
+}
+
+static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent)
+{
+ int ret;
+ const struct blk_io_trace *t = te_blk_io_trace(ent);
+
+ ret = trace_seq_putmem(s, t + 1, t->pdu_len);
+ if (ret)
+ return trace_seq_putc(s, '\n');
+ return ret;
}
/*
@@ -1086,11 +1115,7 @@ static void blk_tracer_print_header(struct seq_file *m)
static void blk_tracer_start(struct trace_array *tr)
{
- mutex_lock(&blk_probe_mutex);
- if (atomic_add_return(1, &blk_probes_ref) == 1)
- if (blk_register_tracepoints())
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
+ blk_tracer_enabled = true;
trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
}
@@ -1098,38 +1123,24 @@ static int blk_tracer_init(struct trace_array *tr)
{
blk_tr = tr;
blk_tracer_start(tr);
- mutex_lock(&blk_probe_mutex);
- blk_tracer_enabled++;
- mutex_unlock(&blk_probe_mutex);
return 0;
}
static void blk_tracer_stop(struct trace_array *tr)
{
+ blk_tracer_enabled = false;
trace_flags |= TRACE_ITER_CONTEXT_INFO;
- mutex_lock(&blk_probe_mutex);
- if (atomic_dec_and_test(&blk_probes_ref))
- blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
}
static void blk_tracer_reset(struct trace_array *tr)
{
- if (!atomic_read(&blk_probes_ref))
- return;
-
- mutex_lock(&blk_probe_mutex);
- blk_tracer_enabled--;
- WARN_ON(blk_tracer_enabled < 0);
- mutex_unlock(&blk_probe_mutex);
-
blk_tracer_stop(tr);
}
-static struct {
+static const struct {
const char *act[2];
int (*print)(struct trace_seq *s, const struct trace_entry *ent);
-} what2act[] __read_mostly = {
+} what2act[] = {
[__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
[__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
[__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
@@ -1147,29 +1158,48 @@ static struct {
[__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
};
-static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
- int flags)
+static enum print_line_t print_one_line(struct trace_iterator *iter,
+ bool classic)
{
struct trace_seq *s = &iter->seq;
- const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
- const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
+ const struct blk_io_trace *t;
+ u16 what;
int ret;
+ bool long_act;
+ blk_log_action_t *log_action;
- if (!trace_print_context(iter))
- return TRACE_TYPE_PARTIAL_LINE;
+ t = te_blk_io_trace(iter->ent);
+ what = t->action & ((1 << BLK_TC_SHIFT) - 1);
+ long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
+ log_action = classic ? &blk_log_action_classic : &blk_log_action;
- if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
+ if (t->action == BLK_TN_MESSAGE) {
+ ret = log_action(iter, long_act ? "message" : "m");
+ if (ret)
+ ret = blk_log_msg(s, iter->ent);
+ goto out;
+ }
+
+ if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
ret = trace_seq_printf(s, "Bad pc action %x\n", what);
else {
- const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
- ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
+ ret = log_action(iter, what2act[what].act[long_act]);
if (ret)
ret = what2act[what].print(s, iter->ent);
}
-
+out:
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
}
+static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
+ int flags)
+{
+ if (!trace_print_context(iter))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return print_one_line(iter, false);
+}
+
static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
@@ -1177,7 +1207,7 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
const int offset = offsetof(struct blk_io_trace, sector);
struct blk_io_trace old = {
.magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
- .time = ns2usecs(iter->ts),
+ .time = iter->ts,
};
if (!trace_seq_putmem(s, &old, offset))
@@ -1195,26 +1225,10 @@ blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
{
- const struct blk_io_trace *t;
- u16 what;
- int ret;
-
if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
return TRACE_TYPE_UNHANDLED;
- t = (const struct blk_io_trace *)iter->ent;
- what = t->action & ((1 << BLK_TC_SHIFT) - 1);
-
- if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
- ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
- else {
- const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
- ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
- if (ret)
- ret = what2act[what].print(&iter->seq, iter->ent);
- }
-
- return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+ return print_one_line(iter, true);
}
static struct tracer blk_tracer __read_mostly = {
@@ -1260,7 +1274,10 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt == NULL)
return -EINVAL;
- kfree(bt);
+ if (atomic_dec_and_test(&blk_probes_ref))
+ blk_unregister_tracepoints();
+
+ blk_trace_free(bt);
return 0;
}
@@ -1270,26 +1287,33 @@ static int blk_trace_remove_queue(struct request_queue *q)
static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
{
struct blk_trace *old_bt, *bt = NULL;
- int ret;
+ int ret = -ENOMEM;
- ret = -ENOMEM;
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
- goto err;
+ return -ENOMEM;
+
+ bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
+ if (!bt->msg_data)
+ goto free_bt;
bt->dev = dev;
bt->act_mask = (u16)-1;
bt->end_lba = -1ULL;
- bt->trace_state = Blktrace_running;
old_bt = xchg(&q->blk_trace, bt);
if (old_bt != NULL) {
(void)xchg(&q->blk_trace, old_bt);
- kfree(bt);
ret = -EBUSY;
+ goto free_bt;
}
+
+ if (atomic_inc_return(&blk_probes_ref) == 1)
+ blk_register_tracepoints();
return 0;
-err:
+
+free_bt:
+ blk_trace_free(bt);
return ret;
}
@@ -1297,72 +1321,6 @@ err:
* sysfs interface to enable and configure tracing
*/
-static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct block_device *bdev;
- ssize_t ret = -ENXIO;
-
- lock_kernel();
- bdev = bdget(part_devt(p));
- if (bdev != NULL) {
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (q != NULL) {
- mutex_lock(&bdev->bd_mutex);
- ret = sprintf(buf, "%u\n", !!q->blk_trace);
- mutex_unlock(&bdev->bd_mutex);
- }
-
- bdput(bdev);
- }
-
- unlock_kernel();
- return ret;
-}
-
-static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct block_device *bdev;
- struct request_queue *q;
- struct hd_struct *p;
- int value;
- ssize_t ret = -ENXIO;
-
- if (count == 0 || sscanf(buf, "%d", &value) != 1)
- goto out;
-
- lock_kernel();
- p = dev_to_part(dev);
- bdev = bdget(part_devt(p));
- if (bdev == NULL)
- goto out_unlock_kernel;
-
- q = bdev_get_queue(bdev);
- if (q == NULL)
- goto out_bdput;
-
- mutex_lock(&bdev->bd_mutex);
- if (value)
- ret = blk_trace_setup_queue(q, bdev->bd_dev);
- else
- ret = blk_trace_remove_queue(q);
- mutex_unlock(&bdev->bd_mutex);
-
- if (ret == 0)
- ret = count;
-out_bdput:
- bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
-out:
- return ret;
-}
-
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf);
@@ -1374,8 +1332,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
sysfs_blk_trace_attr_show, \
sysfs_blk_trace_attr_store)
-static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
- sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
+static BLK_TRACE_DEVICE_ATTR(enable);
static BLK_TRACE_DEVICE_ATTR(act_mask);
static BLK_TRACE_DEVICE_ATTR(pid);
static BLK_TRACE_DEVICE_ATTR(start_lba);
@@ -1395,53 +1352,85 @@ struct attribute_group blk_trace_attr_group = {
.attrs = blk_trace_attrs,
};
-static int blk_str2act_mask(const char *str)
+static const struct {
+ int mask;
+ const char *str;
+} mask_maps[] = {
+ { BLK_TC_READ, "read" },
+ { BLK_TC_WRITE, "write" },
+ { BLK_TC_BARRIER, "barrier" },
+ { BLK_TC_SYNC, "sync" },
+ { BLK_TC_QUEUE, "queue" },
+ { BLK_TC_REQUEUE, "requeue" },
+ { BLK_TC_ISSUE, "issue" },
+ { BLK_TC_COMPLETE, "complete" },
+ { BLK_TC_FS, "fs" },
+ { BLK_TC_PC, "pc" },
+ { BLK_TC_AHEAD, "ahead" },
+ { BLK_TC_META, "meta" },
+ { BLK_TC_DISCARD, "discard" },
+ { BLK_TC_DRV_DATA, "drv_data" },
+};
+
+static int blk_trace_str2mask(const char *str)
{
+ int i;
int mask = 0;
- char *copy = kstrdup(str, GFP_KERNEL), *s;
+ char *s, *token;
- if (copy == NULL)
+ s = kstrdup(str, GFP_KERNEL);
+ if (s == NULL)
return -ENOMEM;
-
- s = strstrip(copy);
+ s = strstrip(s);
while (1) {
- char *sep = strchr(s, ',');
-
- if (sep != NULL)
- *sep = '\0';
-
- if (strcasecmp(s, "barrier") == 0)
- mask |= BLK_TC_BARRIER;
- else if (strcasecmp(s, "complete") == 0)
- mask |= BLK_TC_COMPLETE;
- else if (strcasecmp(s, "fs") == 0)
- mask |= BLK_TC_FS;
- else if (strcasecmp(s, "issue") == 0)
- mask |= BLK_TC_ISSUE;
- else if (strcasecmp(s, "pc") == 0)
- mask |= BLK_TC_PC;
- else if (strcasecmp(s, "queue") == 0)
- mask |= BLK_TC_QUEUE;
- else if (strcasecmp(s, "read") == 0)
- mask |= BLK_TC_READ;
- else if (strcasecmp(s, "requeue") == 0)
- mask |= BLK_TC_REQUEUE;
- else if (strcasecmp(s, "sync") == 0)
- mask |= BLK_TC_SYNC;
- else if (strcasecmp(s, "write") == 0)
- mask |= BLK_TC_WRITE;
-
- if (sep == NULL)
+ token = strsep(&s, ",");
+ if (token == NULL)
break;
- s = sep + 1;
+ if (*token == '\0')
+ continue;
+
+ for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
+ if (strcasecmp(token, mask_maps[i].str) == 0) {
+ mask |= mask_maps[i].mask;
+ break;
+ }
+ }
+ if (i == ARRAY_SIZE(mask_maps)) {
+ mask = -EINVAL;
+ break;
+ }
}
- kfree(copy);
+ kfree(s);
return mask;
}
+static ssize_t blk_trace_mask2str(char *buf, int mask)
+{
+ int i;
+ char *p = buf;
+
+ for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
+ if (mask & mask_maps[i].mask) {
+ p += sprintf(p, "%s%s",
+ (p == buf) ? "" : ",", mask_maps[i].str);
+ }
+ }
+ *p++ = '\n';
+
+ return p - buf;
+}
+
+static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
+{
+ if (bdev->bd_disk == NULL)
+ return NULL;
+
+ return bdev_get_queue(bdev);
+}
+
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1456,20 +1445,29 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
+
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ goto out_unlock_bdev;
+ }
+
if (q->blk_trace == NULL)
ret = sprintf(buf, "disabled\n");
else if (attr == &dev_attr_act_mask)
- ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask);
+ ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
else if (attr == &dev_attr_pid)
ret = sprintf(buf, "%u\n", q->blk_trace->pid);
else if (attr == &dev_attr_start_lba)
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
else if (attr == &dev_attr_end_lba)
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
@@ -1486,7 +1484,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
struct request_queue *q;
struct hd_struct *p;
u64 value;
- ssize_t ret = -ENXIO;
+ ssize_t ret = -EINVAL;
if (count == 0)
goto out;
@@ -1494,24 +1492,36 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (attr == &dev_attr_act_mask) {
if (sscanf(buf, "%llx", &value) != 1) {
/* Assume it is a list of trace category names */
- value = blk_str2act_mask(buf);
- if (value < 0)
+ ret = blk_trace_str2mask(buf);
+ if (ret < 0)
goto out;
+ value = ret;
}
} else if (sscanf(buf, "%llu", &value) != 1)
goto out;
+ ret = -ENXIO;
+
lock_kernel();
p = dev_to_part(dev);
bdev = bdget(part_devt(p));
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ if (value)
+ ret = blk_trace_setup_queue(q, bdev->bd_dev);
+ else
+ ret = blk_trace_remove_queue(q);
+ goto out_unlock_bdev;
+ }
+
ret = 0;
if (q->blk_trace == NULL)
ret = blk_trace_setup_queue(q, bdev->bd_dev);
@@ -1525,13 +1535,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
q->blk_trace->start_lba = value;
else if (attr == &dev_attr_end_lba)
q->blk_trace->end_lba = value;
- ret = count;
}
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
out_unlock_kernel:
unlock_kernel();
out:
- return ret;
+ return ret ? ret : count;
}
+
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
index 9fc918da404..246f2aa6dc4 100644
--- a/kernel/trace/events.c
+++ b/kernel/trace/events.c
@@ -12,4 +12,3 @@
#include "trace_events_stage_2.h"
#include "trace_events_stage_3.h"
-#include <trace/trace_event_types.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d33d306bdcf..f1ed080406c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,6 +29,8 @@
#include <linux/list.h>
#include <linux/hash.h>
+#include <trace/sched.h>
+
#include <asm/ftrace.h>
#include "trace.h"
@@ -272,7 +274,7 @@ enum {
static int ftrace_filtered;
-static LIST_HEAD(ftrace_new_addrs);
+static struct dyn_ftrace *ftrace_new_addrs;
static DEFINE_MUTEX(ftrace_regex_lock);
@@ -339,7 +341,7 @@ static inline int record_frozen(struct dyn_ftrace *rec)
static void ftrace_free_rec(struct dyn_ftrace *rec)
{
- rec->ip = (unsigned long)ftrace_free_records;
+ rec->freelist = ftrace_free_records;
ftrace_free_records = rec;
rec->flags |= FTRACE_FL_FREE;
}
@@ -356,8 +358,14 @@ void ftrace_release(void *start, unsigned long size)
mutex_lock(&ftrace_lock);
do_for_each_ftrace_rec(pg, rec) {
- if ((rec->ip >= s) && (rec->ip < e))
+ if ((rec->ip >= s) && (rec->ip < e)) {
+ /*
+ * rec->ip is changed in ftrace_free_rec()
+ * It should not between s and e if record was freed.
+ */
+ FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
ftrace_free_rec(rec);
+ }
} while_for_each_ftrace_rec();
mutex_unlock(&ftrace_lock);
}
@@ -376,7 +384,7 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
return NULL;
}
- ftrace_free_records = (void *)rec->ip;
+ ftrace_free_records = rec->freelist;
memset(rec, 0, sizeof(*rec));
return rec;
}
@@ -408,8 +416,8 @@ ftrace_record_ip(unsigned long ip)
return NULL;
rec->ip = ip;
-
- list_add(&rec->list, &ftrace_new_addrs);
+ rec->newlist = ftrace_new_addrs;
+ ftrace_new_addrs = rec;
return rec;
}
@@ -531,11 +539,12 @@ static void ftrace_replace_code(int enable)
do_for_each_ftrace_rec(pg, rec) {
/*
- * Skip over free records and records that have
- * failed.
+ * Skip over free records, records that have
+ * failed and not converted.
*/
if (rec->flags & FTRACE_FL_FREE ||
- rec->flags & FTRACE_FL_FAILED)
+ rec->flags & FTRACE_FL_FAILED ||
+ !(rec->flags & FTRACE_FL_CONVERTED))
continue;
/* ignore updates to this record's mcount site */
@@ -547,7 +556,7 @@ static void ftrace_replace_code(int enable)
}
failed = __ftrace_replace_code(rec, enable);
- if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
+ if (failed) {
rec->flags |= FTRACE_FL_FAILED;
if ((system_state == SYSTEM_BOOTING) ||
!core_kernel_text(rec->ip)) {
@@ -714,19 +723,21 @@ unsigned long ftrace_update_tot_cnt;
static int ftrace_update_code(struct module *mod)
{
- struct dyn_ftrace *p, *t;
+ struct dyn_ftrace *p;
cycle_t start, stop;
start = ftrace_now(raw_smp_processor_id());
ftrace_update_cnt = 0;
- list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
+ while (ftrace_new_addrs) {
/* If something went wrong, bail without enabling anything */
if (unlikely(ftrace_disabled))
return -1;
- list_del_init(&p->list);
+ p = ftrace_new_addrs;
+ ftrace_new_addrs = p->newlist;
+ p->flags = 0L;
/* convert record (i.e, patch mcount-call with NOP) */
if (ftrace_code_disable(mod, p)) {
@@ -1118,16 +1129,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
return ftrace_regex_open(inode, file, 0);
}
-static ssize_t
-ftrace_regex_read(struct file *file, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- if (file->f_mode & FMODE_READ)
- return seq_read(file, ubuf, cnt, ppos);
- else
- return -EPERM;
-}
-
static loff_t
ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
{
@@ -1880,7 +1881,7 @@ static const struct file_operations ftrace_failures_fops = {
static const struct file_operations ftrace_filter_fops = {
.open = ftrace_filter_open,
- .read = ftrace_regex_read,
+ .read = seq_read,
.write = ftrace_filter_write,
.llseek = ftrace_regex_lseek,
.release = ftrace_filter_release,
@@ -1888,7 +1889,7 @@ static const struct file_operations ftrace_filter_fops = {
static const struct file_operations ftrace_notrace_fops = {
.open = ftrace_notrace_open,
- .read = ftrace_regex_read,
+ .read = seq_read,
.write = ftrace_notrace_write,
.llseek = ftrace_regex_lseek,
.release = ftrace_notrace_release,
@@ -1990,16 +1991,6 @@ ftrace_graph_open(struct inode *inode, struct file *file)
return ret;
}
-static ssize_t
-ftrace_graph_read(struct file *file, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- if (file->f_mode & FMODE_READ)
- return seq_read(file, ubuf, cnt, ppos);
- else
- return -EPERM;
-}
-
static int
ftrace_set_func(unsigned long *array, int *idx, char *buffer)
{
@@ -2130,7 +2121,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
static const struct file_operations ftrace_graph_fops = {
.open = ftrace_graph_open,
- .read = ftrace_graph_read,
+ .read = seq_read,
.write = ftrace_graph_write,
};
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -2278,7 +2269,7 @@ ftrace_pid_read(struct file *file, char __user *ubuf,
if (ftrace_pid_trace == ftrace_swapper_pid)
r = sprintf(buf, "swapper tasks\n");
else if (ftrace_pid_trace)
- r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace));
+ r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
else
r = sprintf(buf, "no pid\n");
@@ -2606,6 +2597,38 @@ free:
return ret;
}
+static void
+ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev,
+ struct task_struct *next)
+{
+ unsigned long long timestamp;
+ int index;
+
+ /*
+ * Does the user want to count the time a function was asleep.
+ * If so, do not update the time stamps.
+ */
+ if (trace_flags & TRACE_ITER_SLEEP_TIME)
+ return;
+
+ timestamp = trace_clock_local();
+
+ prev->ftrace_timestamp = timestamp;
+
+ /* only process tasks that we timestamped */
+ if (!next->ftrace_timestamp)
+ return;
+
+ /*
+ * Update all the counters in next to make up for the
+ * time next was sleeping.
+ */
+ timestamp -= next->ftrace_timestamp;
+
+ for (index = next->curr_ret_stack; index >= 0; index--)
+ next->ret_stack[index].calltime += timestamp;
+}
+
/* Allocate a return stack for each task */
static int start_graph_tracing(void)
{
@@ -2627,6 +2650,13 @@ static int start_graph_tracing(void)
ret = alloc_retstack_tasklist(ret_stack_list);
} while (ret == -EAGAIN);
+ if (!ret) {
+ ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch);
+ if (ret)
+ pr_info("ftrace_graph: Couldn't activate tracepoint"
+ " probe to kernel_sched_switch\n");
+ }
+
kfree(ret_stack_list);
return ret;
}
@@ -2659,6 +2689,12 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
mutex_lock(&ftrace_lock);
+ /* we currently allow only one tracer registered at a time */
+ if (atomic_read(&ftrace_graph_active)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
register_pm_notifier(&ftrace_suspend_notifier);
@@ -2683,12 +2719,17 @@ void unregister_ftrace_graph(void)
{
mutex_lock(&ftrace_lock);
+ if (!unlikely(atomic_read(&ftrace_graph_active)))
+ goto out;
+
atomic_dec(&ftrace_graph_active);
+ unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(FTRACE_STOP_FUNC_RET);
unregister_pm_notifier(&ftrace_suspend_notifier);
+ out:
mutex_unlock(&ftrace_lock);
}
@@ -2704,6 +2745,7 @@ void ftrace_graph_init_task(struct task_struct *t)
t->curr_ret_stack = -1;
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
+ t->ftrace_timestamp = 0;
} else
t->ret_stack = NULL;
}
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index ae201b3eda8..5011f4d91e3 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -6,14 +6,16 @@
* Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
*/
-#include <linux/dcache.h>
+#include <linux/tracepoint.h>
+#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <linux/dcache.h>
#include <linux/fs.h>
-#include <linux/seq_file.h>
+
#include <trace/kmemtrace.h>
-#include "trace.h"
#include "trace_output.h"
+#include "trace.h"
/* Select an alternative, minimalistic output than the original one */
#define TRACE_KMEM_OPT_MINIMAL 0x1
@@ -25,14 +27,156 @@ static struct tracer_opt kmem_opts[] = {
};
static struct tracer_flags kmem_tracer_flags = {
- .val = 0,
- .opts = kmem_opts
+ .val = 0,
+ .opts = kmem_opts
};
-
-static bool kmem_tracing_enabled __read_mostly;
static struct trace_array *kmemtrace_array;
+/* Trace allocations */
+static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+ struct trace_array *tr = kmemtrace_array;
+ struct kmemtrace_alloc_entry *entry;
+ struct ring_buffer_event *event;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+
+ entry->ent.type = TRACE_KMEM_ALLOC;
+ entry->type_id = type_id;
+ entry->call_site = call_site;
+ entry->ptr = ptr;
+ entry->bytes_req = bytes_req;
+ entry->bytes_alloc = bytes_alloc;
+ entry->gfp_flags = gfp_flags;
+ entry->node = node;
+
+ ring_buffer_unlock_commit(tr->buffer, event);
+
+ trace_wake_up();
+}
+
+static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr)
+{
+ struct trace_array *tr = kmemtrace_array;
+ struct kmemtrace_free_entry *entry;
+ struct ring_buffer_event *event;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+
+ entry->ent.type = TRACE_KMEM_FREE;
+ entry->type_id = type_id;
+ entry->call_site = call_site;
+ entry->ptr = ptr;
+
+ ring_buffer_unlock_commit(tr->buffer, event);
+
+ trace_wake_up();
+}
+
+static void kmemtrace_kmalloc(unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags)
+{
+ kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
+ bytes_req, bytes_alloc, gfp_flags, -1);
+}
+
+static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags)
+{
+ kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
+ bytes_req, bytes_alloc, gfp_flags, -1);
+}
+
+static void kmemtrace_kmalloc_node(unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+ kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
+ bytes_req, bytes_alloc, gfp_flags, node);
+}
+
+static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+ kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
+ bytes_req, bytes_alloc, gfp_flags, node);
+}
+
+static void kmemtrace_kfree(unsigned long call_site, const void *ptr)
+{
+ kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
+}
+
+static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr)
+{
+ kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
+}
+
+static int kmemtrace_start_probes(void)
+{
+ int err;
+
+ err = register_trace_kmalloc(kmemtrace_kmalloc);
+ if (err)
+ return err;
+ err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
+ if (err)
+ return err;
+ err = register_trace_kmalloc_node(kmemtrace_kmalloc_node);
+ if (err)
+ return err;
+ err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
+ if (err)
+ return err;
+ err = register_trace_kfree(kmemtrace_kfree);
+ if (err)
+ return err;
+ err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
+
+ return err;
+}
+
+static void kmemtrace_stop_probes(void)
+{
+ unregister_trace_kmalloc(kmemtrace_kmalloc);
+ unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
+ unregister_trace_kmalloc_node(kmemtrace_kmalloc_node);
+ unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
+ unregister_trace_kfree(kmemtrace_kfree);
+ unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
+}
+
static int kmem_trace_init(struct trace_array *tr)
{
int cpu;
@@ -41,14 +185,14 @@ static int kmem_trace_init(struct trace_array *tr)
for_each_cpu_mask(cpu, cpu_possible_map)
tracing_reset(tr, cpu);
- kmem_tracing_enabled = true;
+ kmemtrace_start_probes();
return 0;
}
static void kmem_trace_reset(struct trace_array *tr)
{
- kmem_tracing_enabled = false;
+ kmemtrace_stop_probes();
}
static void kmemtrace_headers(struct seq_file *s)
@@ -66,47 +210,84 @@ static void kmemtrace_headers(struct seq_file *s)
}
/*
- * The two following functions give the original output from kmemtrace,
- * or something close to....perhaps they need some missing things
+ * The following functions give the original output from kmemtrace,
+ * plus the origin CPU, since reordering occurs in-kernel now.
*/
+
+#define KMEMTRACE_USER_ALLOC 0
+#define KMEMTRACE_USER_FREE 1
+
+struct kmemtrace_user_event {
+ u8 event_id;
+ u8 type_id;
+ u16 event_size;
+ u32 cpu;
+ u64 timestamp;
+ unsigned long call_site;
+ unsigned long ptr;
+};
+
+struct kmemtrace_user_event_alloc {
+ size_t bytes_req;
+ size_t bytes_alloc;
+ unsigned gfp_flags;
+ int node;
+};
+
static enum print_line_t
-kmemtrace_print_alloc_original(struct trace_iterator *iter,
- struct kmemtrace_alloc_entry *entry)
+kmemtrace_print_alloc_user(struct trace_iterator *iter,
+ struct kmemtrace_alloc_entry *entry)
{
+ struct kmemtrace_user_event_alloc *ev_alloc;
struct trace_seq *s = &iter->seq;
- int ret;
+ struct kmemtrace_user_event *ev;
+
+ ev = trace_seq_reserve(s, sizeof(*ev));
+ if (!ev)
+ return TRACE_TYPE_PARTIAL_LINE;
- /* Taken from the old linux/kmemtrace.h */
- ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
- "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
- entry->type_id, entry->call_site, (unsigned long) entry->ptr,
- (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
- (unsigned long) entry->gfp_flags, entry->node);
+ ev->event_id = KMEMTRACE_USER_ALLOC;
+ ev->type_id = entry->type_id;
+ ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
+ ev->cpu = iter->cpu;
+ ev->timestamp = iter->ts;
+ ev->call_site = entry->call_site;
+ ev->ptr = (unsigned long)entry->ptr;
- if (!ret)
+ ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
+ if (!ev_alloc)
return TRACE_TYPE_PARTIAL_LINE;
+ ev_alloc->bytes_req = entry->bytes_req;
+ ev_alloc->bytes_alloc = entry->bytes_alloc;
+ ev_alloc->gfp_flags = entry->gfp_flags;
+ ev_alloc->node = entry->node;
+
return TRACE_TYPE_HANDLED;
}
static enum print_line_t
-kmemtrace_print_free_original(struct trace_iterator *iter,
- struct kmemtrace_free_entry *entry)
+kmemtrace_print_free_user(struct trace_iterator *iter,
+ struct kmemtrace_free_entry *entry)
{
struct trace_seq *s = &iter->seq;
- int ret;
+ struct kmemtrace_user_event *ev;
- /* Taken from the old linux/kmemtrace.h */
- ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
- entry->type_id, entry->call_site, (unsigned long) entry->ptr);
-
- if (!ret)
+ ev = trace_seq_reserve(s, sizeof(*ev));
+ if (!ev)
return TRACE_TYPE_PARTIAL_LINE;
+ ev->event_id = KMEMTRACE_USER_FREE;
+ ev->type_id = entry->type_id;
+ ev->event_size = sizeof(*ev);
+ ev->cpu = iter->cpu;
+ ev->timestamp = iter->ts;
+ ev->call_site = entry->call_site;
+ ev->ptr = (unsigned long)entry->ptr;
+
return TRACE_TYPE_HANDLED;
}
-
/* The two other following provide a more minimalistic output */
static enum print_line_t
kmemtrace_print_alloc_compress(struct trace_iterator *iter,
@@ -178,7 +359,7 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter,
static enum print_line_t
kmemtrace_print_free_compress(struct trace_iterator *iter,
- struct kmemtrace_free_entry *entry)
+ struct kmemtrace_free_entry *entry)
{
struct trace_seq *s = &iter->seq;
int ret;
@@ -239,20 +420,22 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
switch (entry->type) {
case TRACE_KMEM_ALLOC: {
struct kmemtrace_alloc_entry *field;
+
trace_assign_type(field, entry);
if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
return kmemtrace_print_alloc_compress(iter, field);
else
- return kmemtrace_print_alloc_original(iter, field);
+ return kmemtrace_print_alloc_user(iter, field);
}
case TRACE_KMEM_FREE: {
struct kmemtrace_free_entry *field;
+
trace_assign_type(field, entry);
if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
return kmemtrace_print_free_compress(iter, field);
else
- return kmemtrace_print_free_original(iter, field);
+ return kmemtrace_print_free_user(iter, field);
}
default:
@@ -260,70 +443,13 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
}
}
-/* Trace allocations */
-void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags,
- int node)
-{
- struct ring_buffer_event *event;
- struct kmemtrace_alloc_entry *entry;
- struct trace_array *tr = kmemtrace_array;
-
- if (!kmem_tracing_enabled)
- return;
-
- event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
- sizeof(*entry), 0, 0);
- if (!event)
- return;
- entry = ring_buffer_event_data(event);
-
- entry->call_site = call_site;
- entry->ptr = ptr;
- entry->bytes_req = bytes_req;
- entry->bytes_alloc = bytes_alloc;
- entry->gfp_flags = gfp_flags;
- entry->node = node;
-
- trace_buffer_unlock_commit(tr, event, 0, 0);
-}
-EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
-
-void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
- unsigned long call_site,
- const void *ptr)
-{
- struct ring_buffer_event *event;
- struct kmemtrace_free_entry *entry;
- struct trace_array *tr = kmemtrace_array;
-
- if (!kmem_tracing_enabled)
- return;
-
- event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
- sizeof(*entry), 0, 0);
- if (!event)
- return;
- entry = ring_buffer_event_data(event);
- entry->type_id = type_id;
- entry->call_site = call_site;
- entry->ptr = ptr;
-
- trace_buffer_unlock_commit(tr, event, 0, 0);
-}
-EXPORT_SYMBOL(kmemtrace_mark_free);
-
static struct tracer kmem_tracer __read_mostly = {
- .name = "kmemtrace",
- .init = kmem_trace_init,
- .reset = kmem_trace_reset,
- .print_line = kmemtrace_print_line,
- .print_header = kmemtrace_headers,
- .flags = &kmem_tracer_flags
+ .name = "kmemtrace",
+ .init = kmem_trace_init,
+ .reset = kmem_trace_reset,
+ .print_line = kmemtrace_print_line,
+ .print_header = kmemtrace_headers,
+ .flags = &kmem_tracer_flags
};
void kmemtrace_init(void)
@@ -335,5 +461,4 @@ static int __init init_kmem_tracer(void)
{
return register_tracer(&kmem_tracer);
}
-
device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 58128ad2fde..960cbf44c84 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -180,48 +180,74 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
#include "trace.h"
-/* Up this if you want to test the TIME_EXTENTS and normalization */
-#define DEBUG_SHIFT 0
+#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
+#define RB_ALIGNMENT 4U
+#define RB_MAX_SMALL_DATA 28
-u64 ring_buffer_time_stamp(int cpu)
+enum {
+ RB_LEN_TIME_EXTEND = 8,
+ RB_LEN_TIME_STAMP = 16,
+};
+
+static inline int rb_null_event(struct ring_buffer_event *event)
{
- u64 time;
+ return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
+}
- preempt_disable_notrace();
- /* shift to debug/test normalization and TIME_EXTENTS */
- time = trace_clock_local() << DEBUG_SHIFT;
- preempt_enable_no_resched_notrace();
+static inline int rb_discarded_event(struct ring_buffer_event *event)
+{
+ return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
+}
- return time;
+static void rb_event_set_padding(struct ring_buffer_event *event)
+{
+ event->type = RINGBUF_TYPE_PADDING;
+ event->time_delta = 0;
}
-EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
-void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
+/**
+ * ring_buffer_event_discard - discard an event in the ring buffer
+ * @buffer: the ring buffer
+ * @event: the event to discard
+ *
+ * Sometimes a event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * Note, it is up to the user to be careful with this, and protect
+ * against races. If the user discards an event that has been consumed
+ * it is possible that it could corrupt the ring buffer.
+ */
+void ring_buffer_event_discard(struct ring_buffer_event *event)
{
- /* Just stupid testing the normalize function and deltas */
- *ts >>= DEBUG_SHIFT;
+ event->type = RINGBUF_TYPE_PADDING;
+ /* time delta must be non zero */
+ if (!event->time_delta)
+ event->time_delta = 1;
}
-EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
-#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
-#define RB_ALIGNMENT 4U
-#define RB_MAX_SMALL_DATA 28
+static unsigned
+rb_event_data_length(struct ring_buffer_event *event)
+{
+ unsigned length;
-enum {
- RB_LEN_TIME_EXTEND = 8,
- RB_LEN_TIME_STAMP = 16,
-};
+ if (event->len)
+ length = event->len * RB_ALIGNMENT;
+ else
+ length = event->array[0];
+ return length + RB_EVNT_HDR_SIZE;
+}
/* inline for ring buffer fast paths */
static unsigned
rb_event_length(struct ring_buffer_event *event)
{
- unsigned length;
-
switch (event->type) {
case RINGBUF_TYPE_PADDING:
- /* undefined */
- return -1;
+ if (rb_null_event(event))
+ /* undefined */
+ return -1;
+ return rb_event_data_length(event);
case RINGBUF_TYPE_TIME_EXTEND:
return RB_LEN_TIME_EXTEND;
@@ -230,11 +256,7 @@ rb_event_length(struct ring_buffer_event *event)
return RB_LEN_TIME_STAMP;
case RINGBUF_TYPE_DATA:
- if (event->len)
- length = event->len * RB_ALIGNMENT;
- else
- length = event->array[0];
- return length + RB_EVNT_HDR_SIZE;
+ return rb_event_data_length(event);
default:
BUG();
}
@@ -374,6 +396,7 @@ struct ring_buffer {
#ifdef CONFIG_HOTPLUG_CPU
struct notifier_block cpu_notify;
#endif
+ u64 (*clock)(void);
};
struct ring_buffer_iter {
@@ -394,6 +417,30 @@ struct ring_buffer_iter {
_____ret; \
})
+/* Up this if you want to test the TIME_EXTENTS and normalization */
+#define DEBUG_SHIFT 0
+
+u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
+{
+ u64 time;
+
+ preempt_disable_notrace();
+ /* shift to debug/test normalization and TIME_EXTENTS */
+ time = buffer->clock() << DEBUG_SHIFT;
+ preempt_enable_no_resched_notrace();
+
+ return time;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
+
+void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
+ int cpu, u64 *ts)
+{
+ /* Just stupid testing the normalize function and deltas */
+ *ts >>= DEBUG_SHIFT;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
+
/**
* check_pages - integrity check of buffer pages
* @cpu_buffer: CPU buffer with pages to test
@@ -516,7 +563,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
struct list_head *head = &cpu_buffer->pages;
struct buffer_page *bpage, *tmp;
- list_del_init(&cpu_buffer->reader_page->list);
free_buffer_page(cpu_buffer->reader_page);
list_for_each_entry_safe(bpage, tmp, head, list) {
@@ -533,8 +579,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
extern int ring_buffer_page_too_big(void);
#ifdef CONFIG_HOTPLUG_CPU
-static int __cpuinit rb_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu);
+static int rb_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu);
#endif
/**
@@ -569,13 +615,23 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
buffer->flags = flags;
+ buffer->clock = trace_clock_local;
/* need at least two pages */
if (buffer->pages == 1)
buffer->pages++;
+ /*
+ * In case of non-hotplug cpu, if the ring-buffer is allocated
+ * in early initcall, it will not be notified of secondary cpus.
+ * In that off case, we need to allocate for all possible cpus.
+ */
+#ifdef CONFIG_HOTPLUG_CPU
get_online_cpus();
cpumask_copy(buffer->cpumask, cpu_online_mask);
+#else
+ cpumask_copy(buffer->cpumask, cpu_possible_mask);
+#endif
buffer->cpus = nr_cpu_ids;
bsize = sizeof(void *) * nr_cpu_ids;
@@ -645,6 +701,12 @@ ring_buffer_free(struct ring_buffer *buffer)
}
EXPORT_SYMBOL_GPL(ring_buffer_free);
+void ring_buffer_set_clock(struct ring_buffer *buffer,
+ u64 (*clock)(void))
+{
+ buffer->clock = clock;
+}
+
static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
static void
@@ -827,11 +889,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
}
EXPORT_SYMBOL_GPL(ring_buffer_resize);
-static inline int rb_null_event(struct ring_buffer_event *event)
-{
- return event->type == RINGBUF_TYPE_PADDING;
-}
-
static inline void *
__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
{
@@ -1191,7 +1248,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
cpu_buffer->tail_page = next_page;
/* reread the time stamp */
- *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+ *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
cpu_buffer->tail_page->page->time_stamp = *ts;
}
@@ -1201,7 +1258,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (tail < BUF_PAGE_SIZE) {
/* Mark the rest of the page with padding */
event = __rb_page_index(tail_page, tail);
- event->type = RINGBUF_TYPE_PADDING;
+ rb_event_set_padding(event);
}
if (tail <= BUF_PAGE_SIZE)
@@ -1334,7 +1391,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
return NULL;
- ts = ring_buffer_time_stamp(cpu_buffer->cpu);
+ ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
/*
* Only the first commit can update the timestamp.
@@ -1951,7 +2008,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
event = rb_reader_event(cpu_buffer);
- if (event->type == RINGBUF_TYPE_DATA)
+ if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
cpu_buffer->entries--;
rb_update_read_stamp(cpu_buffer, event);
@@ -2034,9 +2091,18 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
switch (event->type) {
case RINGBUF_TYPE_PADDING:
- RB_WARN_ON(cpu_buffer, 1);
+ if (rb_null_event(event))
+ RB_WARN_ON(cpu_buffer, 1);
+ /*
+ * Because the writer could be discarding every
+ * event it creates (which would probably be bad)
+ * if we were to go back to "again" then we may never
+ * catch up, and will trigger the warn on, or lock
+ * the box. Return the padding, and we will release
+ * the current locks, and try again.
+ */
rb_advance_reader(cpu_buffer);
- return NULL;
+ return event;
case RINGBUF_TYPE_TIME_EXTEND:
/* Internal data, OK to advance */
@@ -2051,7 +2117,8 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
case RINGBUF_TYPE_DATA:
if (ts) {
*ts = cpu_buffer->read_stamp + event->time_delta;
- ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+ ring_buffer_normalize_time_stamp(buffer,
+ cpu_buffer->cpu, ts);
}
return event;
@@ -2096,8 +2163,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
switch (event->type) {
case RINGBUF_TYPE_PADDING:
- rb_inc_iter(iter);
- goto again;
+ if (rb_null_event(event)) {
+ rb_inc_iter(iter);
+ goto again;
+ }
+ rb_advance_iter(iter);
+ return event;
case RINGBUF_TYPE_TIME_EXTEND:
/* Internal data, OK to advance */
@@ -2112,7 +2183,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
case RINGBUF_TYPE_DATA:
if (ts) {
*ts = iter->read_stamp + event->time_delta;
- ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
+ ring_buffer_normalize_time_stamp(buffer,
+ cpu_buffer->cpu, ts);
}
return event;
@@ -2143,10 +2215,16 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return NULL;
+ again:
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
event = rb_buffer_peek(buffer, cpu, ts);
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ if (event && event->type == RINGBUF_TYPE_PADDING) {
+ cpu_relax();
+ goto again;
+ }
+
return event;
}
@@ -2165,10 +2243,16 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_event *event;
unsigned long flags;
+ again:
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
event = rb_iter_peek(iter, ts);
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ if (event && event->type == RINGBUF_TYPE_PADDING) {
+ cpu_relax();
+ goto again;
+ }
+
return event;
}
@@ -2187,6 +2271,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
struct ring_buffer_event *event = NULL;
unsigned long flags;
+ again:
/* might be called in atomic */
preempt_disable();
@@ -2208,6 +2293,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
out:
preempt_enable();
+ if (event && event->type == RINGBUF_TYPE_PADDING) {
+ cpu_relax();
+ goto again;
+ }
+
return event;
}
EXPORT_SYMBOL_GPL(ring_buffer_consume);
@@ -2286,6 +2376,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
unsigned long flags;
+ again:
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
event = rb_iter_peek(iter, ts);
if (!event)
@@ -2295,6 +2386,11 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
out:
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ if (event && event->type == RINGBUF_TYPE_PADDING) {
+ cpu_relax();
+ goto again;
+ }
+
return event;
}
EXPORT_SYMBOL_GPL(ring_buffer_read);
@@ -2764,8 +2860,8 @@ static __init int rb_init_debugfs(void)
fs_initcall(rb_init_debugfs);
#ifdef CONFIG_HOTPLUG_CPU
-static int __cpuinit rb_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int rb_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
{
struct ring_buffer *buffer =
container_of(self, struct ring_buffer, cpu_notify);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index efe3202c020..a0174a40c56 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -155,13 +155,6 @@ ns2usecs(cycle_t nsec)
return nsec;
}
-cycle_t ftrace_now(int cpu)
-{
- u64 ts = ring_buffer_time_stamp(cpu);
- ring_buffer_normalize_time_stamp(cpu, &ts);
- return ts;
-}
-
/*
* The global_trace is the descriptor that holds the tracing
* buffers for the live tracing. For each CPU, it contains
@@ -178,6 +171,20 @@ static struct trace_array global_trace;
static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
+cycle_t ftrace_now(int cpu)
+{
+ u64 ts;
+
+ /* Early boot up does not have a buffer yet */
+ if (!global_trace.buffer)
+ return trace_clock_local();
+
+ ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
+ ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
+
+ return ts;
+}
+
/*
* The max_tr is used to snapshot the global_trace when a maximum
* latency is reached. Some tracers will use this to store a maximum
@@ -248,7 +255,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
- TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
+ TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
/**
* trace_wake_up - wake up tasks waiting for trace input
@@ -308,6 +315,8 @@ static const char *trace_options[] = {
"printk-msg-only",
"context-info",
"latency-format",
+ "global-clock",
+ "sleep-time",
NULL
};
@@ -374,7 +383,7 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
return cnt;
}
-ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
+static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
{
int len;
void *ret;
@@ -633,6 +642,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
}
#define SAVED_CMDLINES 128
+#define NO_CMDLINE_MAP UINT_MAX
static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
@@ -644,8 +654,8 @@ static atomic_t trace_record_cmdline_disabled __read_mostly;
static void trace_init_cmdlines(void)
{
- memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
- memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
+ memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
+ memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
cmdline_idx = 0;
}
@@ -737,8 +747,7 @@ void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
{
- unsigned map;
- unsigned idx;
+ unsigned pid, idx;
if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
return;
@@ -753,13 +762,20 @@ static void trace_save_cmdline(struct task_struct *tsk)
return;
idx = map_pid_to_cmdline[tsk->pid];
- if (idx >= SAVED_CMDLINES) {
+ if (idx == NO_CMDLINE_MAP) {
idx = (cmdline_idx + 1) % SAVED_CMDLINES;
- map = map_cmdline_to_pid[idx];
- if (map <= PID_MAX_DEFAULT)
- map_pid_to_cmdline[map] = (unsigned)-1;
+ /*
+ * Check whether the cmdline buffer at idx has a pid
+ * mapped. We are going to overwrite that entry so we
+ * need to clear the map_pid_to_cmdline. Otherwise we
+ * would read the new comm for the old pid.
+ */
+ pid = map_cmdline_to_pid[idx];
+ if (pid != NO_CMDLINE_MAP)
+ map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
+ map_cmdline_to_pid[idx] = tsk->pid;
map_pid_to_cmdline[tsk->pid] = idx;
cmdline_idx = idx;
@@ -770,30 +786,34 @@ static void trace_save_cmdline(struct task_struct *tsk)
__raw_spin_unlock(&trace_cmdline_lock);
}
-char *trace_find_cmdline(int pid)
+void trace_find_cmdline(int pid, char comm[])
{
- char *cmdline = "<...>";
unsigned map;
- if (!pid)
- return "<idle>";
+ if (!pid) {
+ strcpy(comm, "<idle>");
+ return;
+ }
- if (pid > PID_MAX_DEFAULT)
- goto out;
+ if (pid > PID_MAX_DEFAULT) {
+ strcpy(comm, "<...>");
+ return;
+ }
+ __raw_spin_lock(&trace_cmdline_lock);
map = map_pid_to_cmdline[pid];
- if (map >= SAVED_CMDLINES)
- goto out;
-
- cmdline = saved_cmdlines[map];
+ if (map != NO_CMDLINE_MAP)
+ strcpy(comm, saved_cmdlines[map]);
+ else
+ strcpy(comm, "<...>");
- out:
- return cmdline;
+ __raw_spin_unlock(&trace_cmdline_lock);
}
void tracing_record_cmdline(struct task_struct *tsk)
{
- if (atomic_read(&trace_record_cmdline_disabled))
+ if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
+ !tracing_is_on())
return;
trace_save_cmdline(tsk);
@@ -841,15 +861,25 @@ static void ftrace_trace_stack(struct trace_array *tr,
static void ftrace_trace_userstack(struct trace_array *tr,
unsigned long flags, int pc);
-void trace_buffer_unlock_commit(struct trace_array *tr,
- struct ring_buffer_event *event,
- unsigned long flags, int pc)
+static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc,
+ int wake)
{
ring_buffer_unlock_commit(tr->buffer, event);
ftrace_trace_stack(tr, flags, 6, pc);
ftrace_trace_userstack(tr, flags, pc);
- trace_wake_up();
+
+ if (wake)
+ trace_wake_up();
+}
+
+void trace_buffer_unlock_commit(struct trace_array *tr,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc)
+{
+ __trace_buffer_unlock_commit(tr, event, flags, pc, 1);
}
struct ring_buffer_event *
@@ -863,7 +893,13 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
unsigned long flags, int pc)
{
- return trace_buffer_unlock_commit(&global_trace, event, flags, pc);
+ return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+}
+
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+ unsigned long flags, int pc)
+{
+ return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
}
void
@@ -889,7 +925,7 @@ trace_function(struct trace_array *tr,
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void __trace_graph_entry(struct trace_array *tr,
+static int __trace_graph_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned long flags,
int pc)
@@ -898,15 +934,17 @@ static void __trace_graph_entry(struct trace_array *tr,
struct ftrace_graph_ent_entry *entry;
if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
- return;
+ return 0;
event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
sizeof(*entry), flags, pc);
if (!event)
- return;
+ return 0;
entry = ring_buffer_event_data(event);
entry->graph_ent = *trace;
ring_buffer_unlock_commit(global_trace.buffer, event);
+
+ return 1;
}
static void __trace_graph_return(struct trace_array *tr,
@@ -1127,6 +1165,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
+ int ret;
int cpu;
int pc;
@@ -1142,15 +1181,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
pc = preempt_count();
- __trace_graph_entry(tr, trace, flags, pc);
+ ret = __trace_graph_entry(tr, trace, flags, pc);
+ } else {
+ ret = 0;
}
/* Only do the atomic if it is not already set */
if (!test_tsk_trace_graph(current))
set_tsk_trace_graph(current);
+
atomic_dec(&data->disabled);
local_irq_restore(flags);
- return 1;
+ return ret;
}
void trace_graph_return(struct ftrace_graph_ret *trace)
@@ -1182,7 +1224,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
* trace_vbprintk - write binary msg to tracing buffer
*
*/
-int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args)
+int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
static raw_spinlock_t trace_buf_lock =
(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
@@ -1224,7 +1266,6 @@ int trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args)
goto out_unlock;
entry = ring_buffer_event_data(event);
entry->ip = ip;
- entry->depth = depth;
entry->fmt = fmt;
memcpy(entry->buf, trace_buf, sizeof(u32) * len);
@@ -1242,7 +1283,7 @@ out:
}
EXPORT_SYMBOL_GPL(trace_vbprintk);
-int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
+int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
{
static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
static char trace_buf[TRACE_BUF_SIZE];
@@ -1279,7 +1320,6 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
goto out_unlock;
entry = ring_buffer_event_data(event);
entry->ip = ip;
- entry->depth = depth;
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = 0;
@@ -1682,38 +1722,6 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
return TRACE_TYPE_HANDLED;
}
-static enum print_line_t print_bprintk_msg_only(struct trace_iterator *iter)
-{
- struct trace_seq *s = &iter->seq;
- struct trace_entry *entry = iter->ent;
- struct bprint_entry *field;
- int ret;
-
- trace_assign_type(field, entry);
-
- ret = trace_seq_bprintf(s, field->fmt, field->buf);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
-{
- struct trace_seq *s = &iter->seq;
- struct trace_entry *entry = iter->ent;
- struct print_entry *field;
- int ret;
-
- trace_assign_type(field, entry);
-
- ret = trace_seq_printf(s, "%s", field->buf);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- return TRACE_TYPE_HANDLED;
-}
-
static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
@@ -1775,12 +1783,12 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
if (iter->ent->type == TRACE_BPRINT &&
trace_flags & TRACE_ITER_PRINTK &&
trace_flags & TRACE_ITER_PRINTK_MSGONLY)
- return print_bprintk_msg_only(iter);
+ return trace_print_bprintk_msg_only(iter);
if (iter->ent->type == TRACE_PRINT &&
trace_flags & TRACE_ITER_PRINTK &&
trace_flags & TRACE_ITER_PRINTK_MSGONLY)
- return print_printk_msg_only(iter);
+ return trace_print_printk_msg_only(iter);
if (trace_flags & TRACE_ITER_BIN)
return print_bin_fmt(iter);
@@ -1929,9 +1937,14 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
static int tracing_release(struct inode *inode, struct file *file)
{
struct seq_file *m = (struct seq_file *)file->private_data;
- struct trace_iterator *iter = m->private;
+ struct trace_iterator *iter;
int cpu;
+ if (!(file->f_mode & FMODE_READ))
+ return 0;
+
+ iter = m->private;
+
mutex_lock(&trace_types_lock);
for_each_tracing_cpu(cpu) {
if (iter->buffer_iter[cpu])
@@ -1957,12 +1970,24 @@ static int tracing_open(struct inode *inode, struct file *file)
struct trace_iterator *iter;
int ret = 0;
- iter = __tracing_open(inode, file);
- if (IS_ERR(iter))
- ret = PTR_ERR(iter);
- else if (trace_flags & TRACE_ITER_LATENCY_FMT)
- iter->iter_flags |= TRACE_FILE_LAT_FMT;
+ /* If this file was open for write, then erase contents */
+ if ((file->f_mode & FMODE_WRITE) &&
+ !(file->f_flags & O_APPEND)) {
+ long cpu = (long) inode->i_private;
+ if (cpu == TRACE_PIPE_ALL_CPU)
+ tracing_reset_online_cpus(&global_trace);
+ else
+ tracing_reset(&global_trace, cpu);
+ }
+
+ if (file->f_mode & FMODE_READ) {
+ iter = __tracing_open(inode, file);
+ if (IS_ERR(iter))
+ ret = PTR_ERR(iter);
+ else if (trace_flags & TRACE_ITER_LATENCY_FMT)
+ iter->iter_flags |= TRACE_FILE_LAT_FMT;
+ }
return ret;
}
@@ -2037,9 +2062,17 @@ static int show_traces_open(struct inode *inode, struct file *file)
return ret;
}
+static ssize_t
+tracing_write_stub(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ return count;
+}
+
static const struct file_operations tracing_fops = {
.open = tracing_open,
.read = seq_read,
+ .write = tracing_write_stub,
.llseek = seq_lseek,
.release = tracing_release,
};
@@ -2240,6 +2273,34 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
return 0;
}
+static void set_tracer_flags(unsigned int mask, int enabled)
+{
+ /* do nothing if flag is already set */
+ if (!!(trace_flags & mask) == !!enabled)
+ return;
+
+ if (enabled)
+ trace_flags |= mask;
+ else
+ trace_flags &= ~mask;
+
+ if (mask == TRACE_ITER_GLOBAL_CLK) {
+ u64 (*func)(void);
+
+ if (enabled)
+ func = trace_clock_global;
+ else
+ func = trace_clock_local;
+
+ mutex_lock(&trace_types_lock);
+ ring_buffer_set_clock(global_trace.buffer, func);
+
+ if (max_tr.buffer)
+ ring_buffer_set_clock(max_tr.buffer, func);
+ mutex_unlock(&trace_types_lock);
+ }
+}
+
static ssize_t
tracing_trace_options_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
@@ -2267,10 +2328,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
int len = strlen(trace_options[i]);
if (strncmp(cmp, trace_options[i], len) == 0) {
- if (neg)
- trace_flags &= ~(1 << i);
- else
- trace_flags |= (1 << i);
+ set_tracer_flags(1 << i, !neg);
break;
}
}
@@ -2494,7 +2552,7 @@ static int tracing_set_tracer(const char *buf)
if (!ring_buffer_expanded) {
ret = tracing_resize_ring_buffer(trace_buf_size);
if (ret < 0)
- return ret;
+ goto out;
ret = 0;
}
@@ -3110,7 +3168,7 @@ static int mark_printk(const char *fmt, ...)
int ret;
va_list args;
va_start(args, fmt);
- ret = trace_vprintk(0, -1, fmt, args);
+ ret = trace_vprintk(0, fmt, args);
va_end(args);
return ret;
}
@@ -3478,6 +3536,9 @@ struct dentry *tracing_init_dentry(void)
if (d_tracer)
return d_tracer;
+ if (!debugfs_initialized())
+ return NULL;
+
d_tracer = debugfs_create_dir("tracing", NULL);
if (!d_tracer && !once) {
@@ -3539,7 +3600,7 @@ static void tracing_init_debugfs_percpu(long cpu)
pr_warning("Could not create debugfs 'trace_pipe' entry\n");
/* per cpu trace */
- entry = debugfs_create_file("trace", 0444, d_cpu,
+ entry = debugfs_create_file("trace", 0644, d_cpu,
(void *) cpu, &tracing_fops);
if (!entry)
pr_warning("Could not create debugfs 'trace' entry\n");
@@ -3853,7 +3914,7 @@ static __init int tracer_init_debugfs(void)
if (!entry)
pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
- entry = debugfs_create_file("trace", 0444, d_tracer,
+ entry = debugfs_create_file("trace", 0644, d_tracer,
(void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
if (!entry)
pr_warning("Could not create debugfs 'trace' entry\n");
@@ -3983,11 +4044,12 @@ trace_printk_seq(struct trace_seq *s)
trace_seq_init(s);
}
-void ftrace_dump(void)
+static void __ftrace_dump(bool disable_tracing)
{
static DEFINE_SPINLOCK(ftrace_dump_lock);
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
+ unsigned int old_userobj;
static int dump_ran;
unsigned long flags;
int cnt = 0, cpu;
@@ -3999,14 +4061,17 @@ void ftrace_dump(void)
dump_ran = 1;
- /* No turning back! */
tracing_off();
- ftrace_kill();
+
+ if (disable_tracing)
+ ftrace_kill();
for_each_tracing_cpu(cpu) {
atomic_inc(&global_trace.data[cpu]->disabled);
}
+ old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
+
/* don't look at user memory in panic mode */
trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
@@ -4051,10 +4116,26 @@ void ftrace_dump(void)
else
printk(KERN_TRACE "---------------------------------\n");
+ /* Re-enable tracing if requested */
+ if (!disable_tracing) {
+ trace_flags |= old_userobj;
+
+ for_each_tracing_cpu(cpu) {
+ atomic_dec(&global_trace.data[cpu]->disabled);
+ }
+ tracing_on();
+ }
+
out:
spin_unlock_irqrestore(&ftrace_dump_lock, flags);
}
+/* By default: disable tracing after the dump */
+void ftrace_dump(void)
+{
+ __ftrace_dump(true);
+}
+
__init static int tracer_alloc_buffers(void)
{
struct trace_array_cpu *data;
@@ -4125,7 +4206,8 @@ __init static int tracer_alloc_buffers(void)
&trace_panic_notifier);
register_die_notifier(&trace_die_notifier);
- ret = 0;
+
+ return 0;
out_free_cpumask:
free_cpumask_var(tracing_reader_cpumask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e7fbc826f1e..9e15802cca9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -123,7 +123,6 @@ struct userstack_entry {
struct bprint_entry {
struct trace_entry ent;
unsigned long ip;
- int depth;
const char *fmt;
u32 buf[];
};
@@ -131,7 +130,6 @@ struct bprint_entry {
struct print_entry {
struct trace_entry ent;
unsigned long ip;
- int depth;
char buf[];
};
@@ -184,6 +182,12 @@ struct trace_power {
struct power_trace state_data;
};
+enum kmemtrace_type_id {
+ KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
+ KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
+ KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
+};
+
struct kmemtrace_alloc_entry {
struct trace_entry ent;
enum kmemtrace_type_id type_id;
@@ -202,6 +206,19 @@ struct kmemtrace_free_entry {
const void *ptr;
};
+struct syscall_trace_enter {
+ struct trace_entry ent;
+ int nr;
+ unsigned long args[];
+};
+
+struct syscall_trace_exit {
+ struct trace_entry ent;
+ int nr;
+ unsigned long ret;
+};
+
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
@@ -315,6 +332,10 @@ extern void __ftrace_bad_type(void);
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
+ IF_ASSIGN(var, ent, struct syscall_trace_enter, \
+ TRACE_SYSCALL_ENTER); \
+ IF_ASSIGN(var, ent, struct syscall_trace_exit, \
+ TRACE_SYSCALL_EXIT); \
__ftrace_bad_type(); \
} while (0)
@@ -468,6 +489,8 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
unsigned long flags, int pc);
void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+ unsigned long flags, int pc);
struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
struct trace_array_cpu *data);
@@ -547,7 +570,7 @@ struct tracer_switch_ops {
};
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
-extern char *trace_find_cmdline(int pid);
+extern void trace_find_cmdline(int pid, char comm[]);
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
@@ -583,9 +606,9 @@ extern int trace_selftest_startup_hw_branches(struct tracer *trace,
extern void *head_page(struct trace_array_cpu *data);
extern long ns2usecs(cycle_t nsec);
extern int
-trace_vbprintk(unsigned long ip, int depth, const char *fmt, va_list args);
+trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
extern int
-trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
+trace_vprintk(unsigned long ip, const char *fmt, va_list args);
extern unsigned long trace_flags;
@@ -669,6 +692,8 @@ enum trace_iterator_flags {
TRACE_ITER_PRINTK_MSGONLY = 0x10000,
TRACE_ITER_CONTEXT_INFO = 0x20000, /* Print pid/cpu/time */
TRACE_ITER_LATENCY_FMT = 0x40000,
+ TRACE_ITER_GLOBAL_CLK = 0x80000,
+ TRACE_ITER_SLEEP_TIME = 0x100000,
};
/*
@@ -761,22 +786,89 @@ enum {
TRACE_EVENT_TYPE_RAW = 2,
};
+struct ftrace_event_field {
+ struct list_head link;
+ char *name;
+ char *type;
+ int offset;
+ int size;
+};
+
struct ftrace_event_call {
- char *name;
- char *system;
- struct dentry *dir;
- int enabled;
- int (*regfunc)(void);
- void (*unregfunc)(void);
- int id;
- int (*raw_init)(void);
- int (*show_format)(struct trace_seq *s);
+ char *name;
+ char *system;
+ struct dentry *dir;
+ int enabled;
+ int (*regfunc)(void);
+ void (*unregfunc)(void);
+ int id;
+ int (*raw_init)(void);
+ int (*show_format)(struct trace_seq *s);
+ int (*define_fields)(void);
+ struct list_head fields;
+ struct filter_pred **preds;
+
+#ifdef CONFIG_EVENT_PROFILE
+ atomic_t profile_count;
+ int (*profile_enable)(struct ftrace_event_call *);
+ void (*profile_disable)(struct ftrace_event_call *);
+#endif
};
+struct event_subsystem {
+ struct list_head list;
+ const char *name;
+ struct dentry *entry;
+ struct filter_pred **preds;
+};
+
+#define events_for_each(event) \
+ for (event = __start_ftrace_events; \
+ (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+ event++)
+
+#define MAX_FILTER_PRED 8
+
+struct filter_pred;
+
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
+
+struct filter_pred {
+ filter_pred_fn_t fn;
+ u64 val;
+ char *str_val;
+ int str_len;
+ char *field_name;
+ int offset;
+ int not;
+ int or;
+ int compound;
+ int clear;
+};
+
+int trace_define_field(struct ftrace_event_call *call, char *type,
+ char *name, int offset, int size);
+extern void filter_free_pred(struct filter_pred *pred);
+extern void filter_print_preds(struct filter_pred **preds,
+ struct trace_seq *s);
+extern int filter_parse(char **pbuf, struct filter_pred *pred);
+extern int filter_add_pred(struct ftrace_event_call *call,
+ struct filter_pred *pred);
+extern void filter_free_preds(struct ftrace_event_call *call);
+extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern void filter_free_subsystem_preds(struct event_subsystem *system);
+extern int filter_add_subsystem_pred(struct event_subsystem *system,
+ struct filter_pred *pred);
+
void event_trace_printk(unsigned long ip, const char *fmt, ...);
extern struct ftrace_event_call __start_ftrace_events[];
extern struct ftrace_event_call __stop_ftrace_events[];
+#define for_each_event(event) \
+ for (event = __start_ftrace_events; \
+ (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+ event++)
+
extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 05b176abfd3..b588fd81f7f 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -18,6 +18,7 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/ktime.h>
+#include <linux/trace_clock.h>
/*
* trace_clock_local(): the simplest and least coherent tracing clock.
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
new file mode 100644
index 00000000000..22cba997077
--- /dev/null
+++ b/kernel/trace/trace_event_profile.c
@@ -0,0 +1,31 @@
+/*
+ * trace event based perf counter profiling
+ *
+ * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ */
+
+#include "trace.h"
+
+int ftrace_profile_enable(int event_id)
+{
+ struct ftrace_event_call *event;
+
+ for_each_event(event) {
+ if (event->id == event_id)
+ return event->profile_enable(event);
+ }
+
+ return -EINVAL;
+}
+
+void ftrace_profile_disable(int event_id)
+{
+ struct ftrace_event_call *event;
+
+ for_each_event(event) {
+ if (event->id == event_id)
+ return event->profile_disable(event);
+ }
+}
+
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 019915063fe..fd78bee71dd 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -105,7 +105,6 @@ TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore,
TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
TRACE_STRUCT(
TRACE_FIELD(unsigned long, ip, ip)
- TRACE_FIELD(unsigned int, depth, depth)
TRACE_FIELD(char *, fmt, fmt)
TRACE_FIELD_ZERO_CHAR(buf)
),
@@ -115,7 +114,6 @@ TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
TRACE_STRUCT(
TRACE_FIELD(unsigned long, ip, ip)
- TRACE_FIELD(unsigned int, depth, depth)
TRACE_FIELD_ZERO_CHAR(buf)
),
TP_RAW_FMT("%08lx (%d) fmt:%p %s")
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 238ea95a411..64ec4d278ff 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -19,10 +19,38 @@
static DEFINE_MUTEX(event_mutex);
-#define events_for_each(event) \
- for (event = __start_ftrace_events; \
- (unsigned long)event < (unsigned long)__stop_ftrace_events; \
- event++)
+int trace_define_field(struct ftrace_event_call *call, char *type,
+ char *name, int offset, int size)
+{
+ struct ftrace_event_field *field;
+
+ field = kzalloc(sizeof(*field), GFP_KERNEL);
+ if (!field)
+ goto err;
+
+ field->name = kstrdup(name, GFP_KERNEL);
+ if (!field->name)
+ goto err;
+
+ field->type = kstrdup(type, GFP_KERNEL);
+ if (!field->type)
+ goto err;
+
+ field->offset = offset;
+ field->size = size;
+ list_add(&field->link, &call->fields);
+
+ return 0;
+
+err:
+ if (field) {
+ kfree(field->name);
+ kfree(field->type);
+ }
+ kfree(field);
+
+ return -ENOMEM;
+}
static void ftrace_clear_events(void)
{
@@ -90,7 +118,7 @@ static int ftrace_set_clr_event(char *buf, int set)
}
mutex_lock(&event_mutex);
- events_for_each(call) {
+ for_each_event(call) {
if (!call->name || !call->regfunc)
continue;
@@ -348,7 +376,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
#undef FIELD
#define FIELD(type, name) \
- #type, #name, offsetof(typeof(field), name), sizeof(field.name)
+ #type, "common_" #name, offsetof(typeof(field), name), \
+ sizeof(field.name)
static int trace_write_header(struct trace_seq *s)
{
@@ -378,15 +407,15 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
char *buf;
int r;
+ if (*ppos)
+ return 0;
+
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
trace_seq_init(s);
- if (*ppos)
- return 0;
-
/* If any of the first writes fail, so will the show_format. */
trace_seq_printf(s, "name: %s\n", call->name);
@@ -412,6 +441,162 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
return r;
}
+static ssize_t
+event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+ struct ftrace_event_call *call = filp->private_data;
+ struct trace_seq *s;
+ int r;
+
+ if (*ppos)
+ return 0;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+ trace_seq_printf(s, "%d\n", call->id);
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos,
+ s->buffer, s->len);
+ kfree(s);
+ return r;
+}
+
+static ssize_t
+event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct ftrace_event_call *call = filp->private_data;
+ struct trace_seq *s;
+ int r;
+
+ if (*ppos)
+ return 0;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+
+ filter_print_preds(call->preds, s);
+ r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+
+ kfree(s);
+
+ return r;
+}
+
+static ssize_t
+event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct ftrace_event_call *call = filp->private_data;
+ char buf[64], *pbuf = buf;
+ struct filter_pred *pred;
+ int err;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+ if (!pred)
+ return -ENOMEM;
+
+ err = filter_parse(&pbuf, pred);
+ if (err < 0) {
+ filter_free_pred(pred);
+ return err;
+ }
+
+ if (pred->clear) {
+ filter_free_preds(call);
+ filter_free_pred(pred);
+ return cnt;
+ }
+
+ if (filter_add_pred(call, pred)) {
+ filter_free_pred(pred);
+ return -EINVAL;
+ }
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
+static ssize_t
+subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct event_subsystem *system = filp->private_data;
+ struct trace_seq *s;
+ int r;
+
+ if (*ppos)
+ return 0;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+
+ filter_print_preds(system->preds, s);
+ r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+
+ kfree(s);
+
+ return r;
+}
+
+static ssize_t
+subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct event_subsystem *system = filp->private_data;
+ char buf[64], *pbuf = buf;
+ struct filter_pred *pred;
+ int err;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+ if (!pred)
+ return -ENOMEM;
+
+ err = filter_parse(&pbuf, pred);
+ if (err < 0) {
+ filter_free_pred(pred);
+ return err;
+ }
+
+ if (pred->clear) {
+ filter_free_subsystem_preds(system);
+ filter_free_pred(pred);
+ return cnt;
+ }
+
+ if (filter_add_subsystem_pred(system, pred)) {
+ filter_free_subsystem_preds(system);
+ filter_free_pred(pred);
+ return -EINVAL;
+ }
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
static const struct seq_operations show_event_seq_ops = {
.start = t_start,
.next = t_next,
@@ -452,6 +637,23 @@ static const struct file_operations ftrace_event_format_fops = {
.read = event_format_read,
};
+static const struct file_operations ftrace_event_id_fops = {
+ .open = tracing_open_generic,
+ .read = event_id_read,
+};
+
+static const struct file_operations ftrace_event_filter_fops = {
+ .open = tracing_open_generic,
+ .read = event_filter_read,
+ .write = event_filter_write,
+};
+
+static const struct file_operations ftrace_subsystem_filter_fops = {
+ .open = tracing_open_generic,
+ .read = subsystem_filter_read,
+ .write = subsystem_filter_write,
+};
+
static struct dentry *event_trace_events_dir(void)
{
static struct dentry *d_tracer;
@@ -472,12 +674,6 @@ static struct dentry *event_trace_events_dir(void)
return d_events;
}
-struct event_subsystem {
- struct list_head list;
- const char *name;
- struct dentry *entry;
-};
-
static LIST_HEAD(event_subsystems);
static struct dentry *
@@ -510,6 +706,8 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
system->name = name;
list_add(&system->list, &event_subsystems);
+ system->preds = NULL;
+
return system->entry;
}
@@ -550,6 +748,28 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
"'%s/enable' entry\n", call->name);
}
+ if (call->id) {
+ entry = debugfs_create_file("id", 0444, call->dir, call,
+ &ftrace_event_id_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs '%s/id' entry\n",
+ call->name);
+ }
+
+ if (call->define_fields) {
+ ret = call->define_fields();
+ if (ret < 0) {
+ pr_warning("Could not initialize trace point"
+ " events/%s\n", call->name);
+ return ret;
+ }
+ entry = debugfs_create_file("filter", 0644, call->dir, call,
+ &ftrace_event_filter_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'%s/filter' entry\n", call->name);
+ }
+
/* A trace may not want to export its format */
if (!call->show_format)
return 0;
@@ -592,7 +812,7 @@ static __init int event_trace_init(void)
if (!d_events)
return 0;
- events_for_each(call) {
+ for_each_event(call) {
/* The linker may leave blanks */
if (!call->name)
continue;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
new file mode 100644
index 00000000000..026be412f35
--- /dev/null
+++ b/kernel/trace/trace_events_filter.c
@@ -0,0 +1,427 @@
+/*
+ * trace_events_filter - generic event filtering
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+static int filter_pred_64(struct filter_pred *pred, void *event)
+{
+ u64 *addr = (u64 *)(event + pred->offset);
+ u64 val = (u64)pred->val;
+ int match;
+
+ match = (val == *addr) ^ pred->not;
+
+ return match;
+}
+
+static int filter_pred_32(struct filter_pred *pred, void *event)
+{
+ u32 *addr = (u32 *)(event + pred->offset);
+ u32 val = (u32)pred->val;
+ int match;
+
+ match = (val == *addr) ^ pred->not;
+
+ return match;
+}
+
+static int filter_pred_16(struct filter_pred *pred, void *event)
+{
+ u16 *addr = (u16 *)(event + pred->offset);
+ u16 val = (u16)pred->val;
+ int match;
+
+ match = (val == *addr) ^ pred->not;
+
+ return match;
+}
+
+static int filter_pred_8(struct filter_pred *pred, void *event)
+{
+ u8 *addr = (u8 *)(event + pred->offset);
+ u8 val = (u8)pred->val;
+ int match;
+
+ match = (val == *addr) ^ pred->not;
+
+ return match;
+}
+
+static int filter_pred_string(struct filter_pred *pred, void *event)
+{
+ char *addr = (char *)(event + pred->offset);
+ int cmp, match;
+
+ cmp = strncmp(addr, pred->str_val, pred->str_len);
+
+ match = (!cmp) ^ pred->not;
+
+ return match;
+}
+
+/* return 1 if event matches, 0 otherwise (discard) */
+int filter_match_preds(struct ftrace_event_call *call, void *rec)
+{
+ int i, matched, and_failed = 0;
+ struct filter_pred *pred;
+
+ for (i = 0; i < MAX_FILTER_PRED; i++) {
+ if (call->preds[i]) {
+ pred = call->preds[i];
+ if (and_failed && !pred->or)
+ continue;
+ matched = pred->fn(pred, rec);
+ if (!matched && !pred->or) {
+ and_failed = 1;
+ continue;
+ } else if (matched && pred->or)
+ return 1;
+ } else
+ break;
+ }
+
+ if (and_failed)
+ return 0;
+
+ return 1;
+}
+
+void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
+{
+ char *field_name;
+ struct filter_pred *pred;
+ int i;
+
+ if (!preds) {
+ trace_seq_printf(s, "none\n");
+ return;
+ }
+
+ for (i = 0; i < MAX_FILTER_PRED; i++) {
+ if (preds[i]) {
+ pred = preds[i];
+ field_name = pred->field_name;
+ if (i)
+ trace_seq_printf(s, pred->or ? "|| " : "&& ");
+ trace_seq_printf(s, "%s ", field_name);
+ trace_seq_printf(s, pred->not ? "!= " : "== ");
+ if (pred->str_val)
+ trace_seq_printf(s, "%s\n", pred->str_val);
+ else
+ trace_seq_printf(s, "%llu\n", pred->val);
+ } else
+ break;
+ }
+}
+
+static struct ftrace_event_field *
+find_event_field(struct ftrace_event_call *call, char *name)
+{
+ struct ftrace_event_field *field;
+
+ list_for_each_entry(field, &call->fields, link) {
+ if (!strcmp(field->name, name))
+ return field;
+ }
+
+ return NULL;
+}
+
+void filter_free_pred(struct filter_pred *pred)
+{
+ if (!pred)
+ return;
+
+ kfree(pred->field_name);
+ kfree(pred->str_val);
+ kfree(pred);
+}
+
+void filter_free_preds(struct ftrace_event_call *call)
+{
+ int i;
+
+ if (call->preds) {
+ for (i = 0; i < MAX_FILTER_PRED; i++)
+ filter_free_pred(call->preds[i]);
+ kfree(call->preds);
+ call->preds = NULL;
+ }
+}
+
+void filter_free_subsystem_preds(struct event_subsystem *system)
+{
+ struct ftrace_event_call *call = __start_ftrace_events;
+ int i;
+
+ if (system->preds) {
+ for (i = 0; i < MAX_FILTER_PRED; i++)
+ filter_free_pred(system->preds[i]);
+ kfree(system->preds);
+ system->preds = NULL;
+ }
+
+ events_for_each(call) {
+ if (!call->name || !call->regfunc)
+ continue;
+
+ if (!strcmp(call->system, system->name))
+ filter_free_preds(call);
+ }
+}
+
+static int __filter_add_pred(struct ftrace_event_call *call,
+ struct filter_pred *pred)
+{
+ int i;
+
+ if (call->preds && !pred->compound)
+ filter_free_preds(call);
+
+ if (!call->preds) {
+ call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+ GFP_KERNEL);
+ if (!call->preds)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < MAX_FILTER_PRED; i++) {
+ if (!call->preds[i]) {
+ call->preds[i] = pred;
+ return 0;
+ }
+ }
+
+ return -ENOMEM;
+}
+
+static int is_string_field(const char *type)
+{
+ if (strchr(type, '[') && strstr(type, "char"))
+ return 1;
+
+ return 0;
+}
+
+int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
+{
+ struct ftrace_event_field *field;
+
+ field = find_event_field(call, pred->field_name);
+ if (!field)
+ return -EINVAL;
+
+ pred->offset = field->offset;
+
+ if (is_string_field(field->type)) {
+ if (!pred->str_val)
+ return -EINVAL;
+ pred->fn = filter_pred_string;
+ pred->str_len = field->size;
+ return __filter_add_pred(call, pred);
+ } else {
+ if (pred->str_val)
+ return -EINVAL;
+ }
+
+ switch (field->size) {
+ case 8:
+ pred->fn = filter_pred_64;
+ break;
+ case 4:
+ pred->fn = filter_pred_32;
+ break;
+ case 2:
+ pred->fn = filter_pred_16;
+ break;
+ case 1:
+ pred->fn = filter_pred_8;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return __filter_add_pred(call, pred);
+}
+
+static struct filter_pred *copy_pred(struct filter_pred *pred)
+{
+ struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
+ if (!new_pred)
+ return NULL;
+
+ memcpy(new_pred, pred, sizeof(*pred));
+
+ if (pred->field_name) {
+ new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
+ if (!new_pred->field_name) {
+ kfree(new_pred);
+ return NULL;
+ }
+ }
+
+ if (pred->str_val) {
+ new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
+ if (!new_pred->str_val) {
+ filter_free_pred(new_pred);
+ return NULL;
+ }
+ }
+
+ return new_pred;
+}
+
+int filter_add_subsystem_pred(struct event_subsystem *system,
+ struct filter_pred *pred)
+{
+ struct ftrace_event_call *call = __start_ftrace_events;
+ struct filter_pred *event_pred;
+ int i;
+
+ if (system->preds && !pred->compound)
+ filter_free_subsystem_preds(system);
+
+ if (!system->preds) {
+ system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+ GFP_KERNEL);
+ if (!system->preds)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < MAX_FILTER_PRED; i++) {
+ if (!system->preds[i]) {
+ system->preds[i] = pred;
+ break;
+ }
+ }
+
+ if (i == MAX_FILTER_PRED)
+ return -EINVAL;
+
+ events_for_each(call) {
+ int err;
+
+ if (!call->name || !call->regfunc)
+ continue;
+
+ if (strcmp(call->system, system->name))
+ continue;
+
+ if (!find_event_field(call, pred->field_name))
+ continue;
+
+ event_pred = copy_pred(pred);
+ if (!event_pred)
+ goto oom;
+
+ err = filter_add_pred(call, event_pred);
+ if (err)
+ filter_free_pred(event_pred);
+ if (err == -ENOMEM)
+ goto oom;
+ }
+
+ return 0;
+
+oom:
+ system->preds[i] = NULL;
+ return -ENOMEM;
+}
+
+int filter_parse(char **pbuf, struct filter_pred *pred)
+{
+ char *tmp, *tok, *val_str = NULL;
+ int tok_n = 0;
+
+ /* field ==/!= number, or/and field ==/!= number, number */
+ while ((tok = strsep(pbuf, " \n"))) {
+ if (tok_n == 0) {
+ if (!strcmp(tok, "0")) {
+ pred->clear = 1;
+ return 0;
+ } else if (!strcmp(tok, "&&")) {
+ pred->or = 0;
+ pred->compound = 1;
+ } else if (!strcmp(tok, "||")) {
+ pred->or = 1;
+ pred->compound = 1;
+ } else
+ pred->field_name = tok;
+ tok_n = 1;
+ continue;
+ }
+ if (tok_n == 1) {
+ if (!pred->field_name)
+ pred->field_name = tok;
+ else if (!strcmp(tok, "!="))
+ pred->not = 1;
+ else if (!strcmp(tok, "=="))
+ pred->not = 0;
+ else {
+ pred->field_name = NULL;
+ return -EINVAL;
+ }
+ tok_n = 2;
+ continue;
+ }
+ if (tok_n == 2) {
+ if (pred->compound) {
+ if (!strcmp(tok, "!="))
+ pred->not = 1;
+ else if (!strcmp(tok, "=="))
+ pred->not = 0;
+ else {
+ pred->field_name = NULL;
+ return -EINVAL;
+ }
+ } else {
+ val_str = tok;
+ break; /* done */
+ }
+ tok_n = 3;
+ continue;
+ }
+ if (tok_n == 3) {
+ val_str = tok;
+ break; /* done */
+ }
+ }
+
+ pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
+ if (!pred->field_name)
+ return -ENOMEM;
+
+ pred->val = simple_strtoull(val_str, &tmp, 10);
+ if (tmp == val_str) {
+ pred->str_val = kstrdup(val_str, GFP_KERNEL);
+ if (!pred->str_val)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 5117c43f5c6..30743f7d411 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -129,3 +129,48 @@ ftrace_format_##call(struct trace_seq *s) \
}
#include <trace/trace_event_types.h>
+
+#undef __field
+#define __field(type, item) \
+ ret = trace_define_field(event_call, #type, #item, \
+ offsetof(typeof(field), item), \
+ sizeof(field.item)); \
+ if (ret) \
+ return ret;
+
+#undef __array
+#define __array(type, item, len) \
+ ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+ offsetof(typeof(field), item), \
+ sizeof(field.item)); \
+ if (ret) \
+ return ret;
+
+#define __common_field(type, item) \
+ ret = trace_define_field(event_call, #type, "common_" #item, \
+ offsetof(typeof(field.ent), item), \
+ sizeof(field.ent.item)); \
+ if (ret) \
+ return ret;
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
+int \
+ftrace_define_fields_##call(void) \
+{ \
+ struct ftrace_raw_##call field; \
+ struct ftrace_event_call *event_call = &event_##call; \
+ int ret; \
+ \
+ __common_field(unsigned char, type); \
+ __common_field(unsigned char, flags); \
+ __common_field(unsigned char, preempt_count); \
+ __common_field(int, pid); \
+ __common_field(int, tgid); \
+ \
+ tstruct; \
+ \
+ return ret; \
+}
+
+#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
index ae2e323df0c..9d2fa78cecc 100644
--- a/kernel/trace/trace_events_stage_3.h
+++ b/kernel/trace/trace_events_stage_3.h
@@ -109,6 +109,40 @@
#undef TP_FMT
#define TP_FMT(fmt, args...) fmt "\n", ##args
+#ifdef CONFIG_EVENT_PROFILE
+#define _TRACE_PROFILE(call, proto, args) \
+static void ftrace_profile_##call(proto) \
+{ \
+ extern void perf_tpcounter_event(int); \
+ perf_tpcounter_event(event_##call.id); \
+} \
+ \
+static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
+{ \
+ int ret = 0; \
+ \
+ if (!atomic_inc_return(&call->profile_count)) \
+ ret = register_trace_##call(ftrace_profile_##call); \
+ \
+ return ret; \
+} \
+ \
+static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
+{ \
+ if (atomic_add_negative(-1, &call->profile_count)) \
+ unregister_trace_##call(ftrace_profile_##call); \
+}
+
+#define _TRACE_PROFILE_INIT(call) \
+ .profile_count = ATOMIC_INIT(-1), \
+ .profile_enable = ftrace_profile_enable_##call, \
+ .profile_disable = ftrace_profile_disable_##call,
+
+#else
+#define _TRACE_PROFILE(call, proto, args)
+#define _TRACE_PROFILE_INIT(call)
+#endif
+
#define _TRACE_FORMAT(call, proto, args, fmt) \
static void ftrace_event_##call(proto) \
{ \
@@ -130,18 +164,33 @@ static void ftrace_unreg_event_##call(void) \
{ \
unregister_trace_##call(ftrace_event_##call); \
} \
-
+ \
+static struct ftrace_event_call event_##call; \
+ \
+static int ftrace_init_event_##call(void) \
+{ \
+ int id; \
+ \
+ id = register_ftrace_event(NULL); \
+ if (!id) \
+ return -ENODEV; \
+ event_##call.id = id; \
+ return 0; \
+}
#undef TRACE_FORMAT
#define TRACE_FORMAT(call, proto, args, fmt) \
_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.system = __stringify(TRACE_SYSTEM), \
+ .raw_init = ftrace_init_event_##call, \
.regfunc = ftrace_reg_event_##call, \
.unregfunc = ftrace_unreg_event_##call, \
+ _TRACE_PROFILE_INIT(call) \
}
#undef __entry
@@ -149,11 +198,13 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
#undef TRACE_EVENT
#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
\
static struct ftrace_event_call event_##call; \
\
static void ftrace_raw_event_##call(proto) \
{ \
+ struct ftrace_event_call *call = &event_##call; \
struct ring_buffer_event *event; \
struct ftrace_raw_##call *entry; \
unsigned long irq_flags; \
@@ -171,7 +222,11 @@ static void ftrace_raw_event_##call(proto) \
\
assign; \
\
- trace_current_buffer_unlock_commit(event, irq_flags, pc); \
+ if (call->preds && !filter_match_preds(call, entry)) \
+ ring_buffer_event_discard(event); \
+ \
+ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
+ \
} \
\
static int ftrace_raw_reg_event_##call(void) \
@@ -202,6 +257,7 @@ static int ftrace_raw_init_event_##call(void) \
if (!id) \
return -ENODEV; \
event_##call.id = id; \
+ INIT_LIST_HEAD(&event_##call.fields); \
return 0; \
} \
\
@@ -214,4 +270,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.regfunc = ftrace_raw_reg_event_##call, \
.unregfunc = ftrace_raw_unreg_event_##call, \
.show_format = ftrace_format_##call, \
+ .define_fields = ftrace_define_fields_##call, \
+ _TRACE_PROFILE_INIT(call) \
}
+
+#include <trace/trace_event_types.h>
+
+#undef _TRACE_PROFILE
+#undef _TRACE_PROFILE_INIT
+
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4c388607ed6..d28687e7b3a 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,6 +14,11 @@
#include "trace.h"
#include "trace_output.h"
+struct fgraph_data {
+ pid_t last_pid;
+ int depth;
+};
+
#define TRACE_GRAPH_INDENT 2
/* Flag options */
@@ -52,9 +57,9 @@ static struct tracer_flags tracer_flags = {
/* Add a function return address to the trace stack on thread info.*/
int
-ftrace_push_return_trace(unsigned long ret, unsigned long long time,
- unsigned long func, int *depth)
+ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
{
+ unsigned long long calltime;
int index;
if (!current->ret_stack)
@@ -66,11 +71,13 @@ ftrace_push_return_trace(unsigned long ret, unsigned long long time,
return -EBUSY;
}
+ calltime = trace_clock_local();
+
index = ++current->curr_ret_stack;
barrier();
current->ret_stack[index].ret = ret;
current->ret_stack[index].func = func;
- current->ret_stack[index].calltime = time;
+ current->ret_stack[index].calltime = calltime;
*depth = index;
return 0;
@@ -190,15 +197,15 @@ print_graph_cpu(struct trace_seq *s, int cpu)
static enum print_line_t
print_graph_proc(struct trace_seq *s, pid_t pid)
{
- int i;
- int ret;
- int len;
- char comm[8];
- int spaces = 0;
+ char comm[TASK_COMM_LEN];
/* sign + log10(MAX_INT) + '\0' */
char pid_str[11];
+ int spaces = 0;
+ int ret;
+ int len;
+ int i;
- strncpy(comm, trace_find_cmdline(pid), 7);
+ trace_find_cmdline(pid, comm);
comm[7] = '\0';
sprintf(pid_str, "%d", pid);
@@ -231,16 +238,16 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
/* If the pid changed since the last trace, output this event */
static enum print_line_t
-verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
+verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
{
pid_t prev_pid;
pid_t *last_pid;
int ret;
- if (!last_pids_cpu)
+ if (!data)
return TRACE_TYPE_HANDLED;
- last_pid = per_cpu_ptr(last_pids_cpu, cpu);
+ last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
if (*last_pid == pid)
return TRACE_TYPE_HANDLED;
@@ -471,6 +478,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
struct ftrace_graph_ent_entry *entry,
struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
{
+ struct fgraph_data *data = iter->private;
struct ftrace_graph_ret *graph_ret;
struct ftrace_graph_ent *call;
unsigned long long duration;
@@ -481,6 +489,18 @@ print_graph_entry_leaf(struct trace_iterator *iter,
call = &entry->graph_ent;
duration = graph_ret->rettime - graph_ret->calltime;
+ if (data) {
+ int cpu = iter->cpu;
+ int *depth = &(per_cpu_ptr(data, cpu)->depth);
+
+ /*
+ * Comments display at + 1 to depth. Since
+ * this is a leaf function, keep the comments
+ * equal to this depth.
+ */
+ *depth = call->depth - 1;
+ }
+
/* Overhead */
ret = print_graph_overhead(duration, s);
if (!ret)
@@ -512,12 +532,21 @@ print_graph_entry_leaf(struct trace_iterator *iter,
}
static enum print_line_t
-print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
- struct trace_seq *s, pid_t pid, int cpu)
+print_graph_entry_nested(struct trace_iterator *iter,
+ struct ftrace_graph_ent_entry *entry,
+ struct trace_seq *s, int cpu)
{
- int i;
- int ret;
struct ftrace_graph_ent *call = &entry->graph_ent;
+ struct fgraph_data *data = iter->private;
+ int ret;
+ int i;
+
+ if (data) {
+ int cpu = iter->cpu;
+ int *depth = &(per_cpu_ptr(data, cpu)->depth);
+
+ *depth = call->depth;
+ }
/* No overhead */
ret = print_graph_overhead(-1, s);
@@ -554,24 +583,24 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
}
static enum print_line_t
-print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
- struct trace_iterator *iter)
+print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
+ int type, unsigned long addr)
{
- int ret;
- int cpu = iter->cpu;
- pid_t *last_entry = iter->private;
+ struct fgraph_data *data = iter->private;
struct trace_entry *ent = iter->ent;
- struct ftrace_graph_ent *call = &field->graph_ent;
- struct ftrace_graph_ret_entry *leaf_ret;
+ int cpu = iter->cpu;
+ int ret;
/* Pid */
- if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
+ if (verif_pid(s, ent->pid, cpu, data) == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
- /* Interrupt */
- ret = print_graph_irq(iter, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
+ if (type) {
+ /* Interrupt */
+ ret = print_graph_irq(iter, addr, type, cpu, ent->pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Absolute time */
if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
@@ -598,11 +627,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
return TRACE_TYPE_PARTIAL_LINE;
}
+ return 0;
+}
+
+static enum print_line_t
+print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
+ struct trace_iterator *iter)
+{
+ int cpu = iter->cpu;
+ struct ftrace_graph_ent *call = &field->graph_ent;
+ struct ftrace_graph_ret_entry *leaf_ret;
+
+ if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
+ return TRACE_TYPE_PARTIAL_LINE;
+
leaf_ret = get_return_for_leaf(iter, field);
if (leaf_ret)
return print_graph_entry_leaf(iter, field, leaf_ret, s);
else
- return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
+ return print_graph_entry_nested(iter, field, s, cpu);
}
@@ -610,40 +653,27 @@ static enum print_line_t
print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
struct trace_entry *ent, struct trace_iterator *iter)
{
- int i;
- int ret;
- int cpu = iter->cpu;
- pid_t *last_pid = iter->private, pid = ent->pid;
unsigned long long duration = trace->rettime - trace->calltime;
+ struct fgraph_data *data = iter->private;
+ pid_t pid = ent->pid;
+ int cpu = iter->cpu;
+ int ret;
+ int i;
- /* Pid */
- if (verif_pid(s, pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
+ if (data) {
+ int cpu = iter->cpu;
+ int *depth = &(per_cpu_ptr(data, cpu)->depth);
- /* Absolute time */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
- ret = print_graph_abs_time(iter->ts, s);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ /*
+ * Comments display at + 1 to depth. This is the
+ * return from a function, we now want the comments
+ * to display at the same level of the bracket.
+ */
+ *depth = trace->depth - 1;
}
- /* Cpu */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
- ret = print_graph_cpu(s, cpu);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
- }
-
- /* Proc */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
- ret = print_graph_proc(s, ent->pid);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ if (print_graph_prologue(iter, s, 0, 0))
+ return TRACE_TYPE_PARTIAL_LINE;
/* Overhead */
ret = print_graph_overhead(duration, s);
@@ -684,42 +714,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
}
static enum print_line_t
-print_graph_comment(struct bprint_entry *trace, struct trace_seq *s,
- struct trace_entry *ent, struct trace_iterator *iter)
+print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
+ struct trace_iterator *iter)
{
- int i;
+ unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
+ struct fgraph_data *data = iter->private;
+ struct trace_event *event;
+ int depth = 0;
int ret;
- int cpu = iter->cpu;
- pid_t *last_pid = iter->private;
-
- /* Pid */
- if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Absolute time */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
- ret = print_graph_abs_time(iter->ts, s);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ int i;
- /* Cpu */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
- ret = print_graph_cpu(s, cpu);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ if (data)
+ depth = per_cpu_ptr(data, iter->cpu)->depth;
- /* Proc */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
- ret = print_graph_proc(s, ent->pid);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ if (print_graph_prologue(iter, s, 0, 0))
+ return TRACE_TYPE_PARTIAL_LINE;
/* No overhead */
ret = print_graph_overhead(-1, s);
@@ -734,8 +743,8 @@ print_graph_comment(struct bprint_entry *trace, struct trace_seq *s,
}
/* Indentation */
- if (trace->depth > 0)
- for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
+ if (depth > 0)
+ for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) {
ret = trace_seq_printf(s, " ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -746,9 +755,26 @@ print_graph_comment(struct bprint_entry *trace, struct trace_seq *s,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_bprintf(s, trace->fmt, trace->buf);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ switch (iter->ent->type) {
+ case TRACE_BPRINT:
+ ret = trace_print_bprintk_msg_only(iter);
+ if (ret != TRACE_TYPE_HANDLED)
+ return ret;
+ break;
+ case TRACE_PRINT:
+ ret = trace_print_printk_msg_only(iter);
+ if (ret != TRACE_TYPE_HANDLED)
+ return ret;
+ break;
+ default:
+ event = ftrace_find_event(ent->type);
+ if (!event)
+ return TRACE_TYPE_UNHANDLED;
+
+ ret = event->trace(iter, sym_flags);
+ if (ret != TRACE_TYPE_HANDLED)
+ return ret;
+ }
/* Strip ending newline */
if (s->buffer[s->len - 1] == '\n') {
@@ -767,8 +793,8 @@ print_graph_comment(struct bprint_entry *trace, struct trace_seq *s,
enum print_line_t
print_graph_function(struct trace_iterator *iter)
{
- struct trace_seq *s = &iter->seq;
struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
switch (entry->type) {
case TRACE_GRAPH_ENT: {
@@ -781,14 +807,11 @@ print_graph_function(struct trace_iterator *iter)
trace_assign_type(field, entry);
return print_graph_return(&field->ret, s, entry, iter);
}
- case TRACE_BPRINT: {
- struct bprint_entry *field;
- trace_assign_type(field, entry);
- return print_graph_comment(field, s, entry, iter);
- }
default:
- return TRACE_TYPE_UNHANDLED;
+ return print_graph_comment(s, entry, iter);
}
+
+ return TRACE_TYPE_HANDLED;
}
static void print_graph_headers(struct seq_file *s)
@@ -820,19 +843,21 @@ static void print_graph_headers(struct seq_file *s)
static void graph_trace_open(struct trace_iterator *iter)
{
- /* pid on the last trace processed */
- pid_t *last_pid = alloc_percpu(pid_t);
+ /* pid and depth on the last trace processed */
+ struct fgraph_data *data = alloc_percpu(struct fgraph_data);
int cpu;
- if (!last_pid)
+ if (!data)
pr_warning("function graph tracer: not enough memory\n");
else
for_each_possible_cpu(cpu) {
- pid_t *pid = per_cpu_ptr(last_pid, cpu);
+ pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
+ int *depth = &(per_cpu_ptr(data, cpu)->depth);
*pid = -1;
+ *depth = 0;
}
- iter->private = last_pid;
+ iter->private = data;
}
static void graph_trace_close(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index f095916e477..8e37fcddd8b 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -359,5 +359,5 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
int mmio_trace_printk(const char *fmt, va_list args)
{
- return trace_vprintk(0, -1, fmt, args);
+ return trace_vprintk(0, fmt, args);
}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 9aa84bde23c..394f94417e2 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -91,6 +91,7 @@ struct tracer nop_trace __read_mostly =
.name = "nop",
.init = nop_trace_init,
.reset = nop_trace_reset,
+ .wait_pipe = poll_wait_pipe,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_nop,
#endif
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index ea9d3b410c7..d72b9a63b24 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -19,6 +19,38 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
static int next_event_type = __TRACE_LAST_TYPE + 1;
+enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ struct bprint_entry *field;
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_bprintf(s, field->fmt, field->buf);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ struct print_entry *field;
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%s", field->buf);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
/**
* trace_seq_printf - sequence printing of trace information
* @s: trace sequence descriptor
@@ -105,7 +137,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
return 1;
}
-int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
+int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
{
if (len > ((PAGE_SIZE - 1) - s->len))
return 0;
@@ -116,10 +148,10 @@ int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
return len;
}
-int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
+int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, size_t len)
{
unsigned char hex[HEX_CHARS];
- unsigned char *data = mem;
+ const unsigned char *data = mem;
int i, j;
#ifdef __BIG_ENDIAN
@@ -135,6 +167,19 @@ int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
return trace_seq_putmem(s, hex, j);
}
+void *trace_seq_reserve(struct trace_seq *s, size_t len)
+{
+ void *ret;
+
+ if (len > ((PAGE_SIZE - 1) - s->len))
+ return NULL;
+
+ ret = s->buffer + s->len;
+ s->len += len;
+
+ return ret;
+}
+
int trace_seq_path(struct trace_seq *s, struct path *path)
{
unsigned char *p;
@@ -309,9 +354,9 @@ static int
lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
{
int hardirq, softirq;
- char *comm;
+ char comm[TASK_COMM_LEN];
- comm = trace_find_cmdline(entry->pid);
+ trace_find_cmdline(entry->pid, comm);
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
@@ -346,10 +391,12 @@ int trace_print_context(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry = iter->ent;
- char *comm = trace_find_cmdline(entry->pid);
unsigned long long t = ns2usecs(iter->ts);
unsigned long usec_rem = do_div(t, USEC_PER_SEC);
unsigned long secs = (unsigned long)t;
+ char comm[TASK_COMM_LEN];
+
+ trace_find_cmdline(entry->pid, comm);
return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
comm, entry->pid, iter->cpu, secs, usec_rem);
@@ -372,7 +419,10 @@ int trace_print_lat_context(struct trace_iterator *iter)
rel_usecs = ns2usecs(next_ts - iter->ts);
if (verbose) {
- char *comm = trace_find_cmdline(entry->pid);
+ char comm[TASK_COMM_LEN];
+
+ trace_find_cmdline(entry->pid, comm);
+
ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
" %ld.%03ldms (+%ld.%03ldms): ", comm,
entry->pid, iter->cpu, entry->flags,
@@ -444,6 +494,11 @@ int register_ftrace_event(struct trace_event *event)
mutex_lock(&trace_event_mutex);
+ if (!event) {
+ ret = next_event_type++;
+ goto out;
+ }
+
if (!event->type)
event->type = next_event_type++;
else if (event->type > __TRACE_LAST_TYPE) {
@@ -577,14 +632,15 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
char *delim)
{
struct ctx_switch_entry *field;
- char *comm;
+ char comm[TASK_COMM_LEN];
int S, T;
+
trace_assign_type(field, iter->ent);
T = task_state_char(field->next_state);
S = task_state_char(field->prev_state);
- comm = trace_find_cmdline(field->next_pid);
+ trace_find_cmdline(field->next_pid, comm);
if (!trace_seq_printf(&iter->seq,
" %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
field->prev_pid,
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 3b90e6ade1a..e0bde39c2dd 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -15,6 +15,11 @@ struct trace_event {
trace_print_func binary;
};
+extern enum print_line_t
+trace_print_bprintk_msg_only(struct trace_iterator *iter);
+extern enum print_line_t
+trace_print_printk_msg_only(struct trace_iterator *iter);
+
extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3)));
extern int
@@ -24,24 +29,27 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
unsigned long sym_flags);
extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
size_t cnt);
-int trace_seq_puts(struct trace_seq *s, const char *str);
-int trace_seq_putc(struct trace_seq *s, unsigned char c);
-int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
-int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
-int trace_seq_path(struct trace_seq *s, struct path *path);
-int seq_print_userip_objs(const struct userstack_entry *entry,
- struct trace_seq *s, unsigned long sym_flags);
-int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
- unsigned long ip, unsigned long sym_flags);
-
-int trace_print_context(struct trace_iterator *iter);
-int trace_print_lat_context(struct trace_iterator *iter);
-
-struct trace_event *ftrace_find_event(int type);
-int register_ftrace_event(struct trace_event *event);
-int unregister_ftrace_event(struct trace_event *event);
-
-enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags);
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
+extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+ size_t len);
+extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
+extern int trace_seq_path(struct trace_seq *s, struct path *path);
+extern int seq_print_userip_objs(const struct userstack_entry *entry,
+ struct trace_seq *s, unsigned long sym_flags);
+extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+ unsigned long ip, unsigned long sym_flags);
+
+extern int trace_print_context(struct trace_iterator *iter);
+extern int trace_print_lat_context(struct trace_iterator *iter);
+
+extern struct trace_event *ftrace_find_event(int type);
+extern int register_ftrace_event(struct trace_event *event);
+extern int unregister_ftrace_event(struct trace_event *event);
+
+extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
+ int flags);
#define MAX_MEMHEX_BYTES 8
#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 91ce672fb03..bae791ebcc5 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -122,12 +122,16 @@ fail_start:
static void start_power_trace(struct trace_array *tr)
{
trace_power_enabled = 1;
- tracing_power_register();
}
static void stop_power_trace(struct trace_array *tr)
{
trace_power_enabled = 0;
+}
+
+static void power_trace_reset(struct trace_array *tr)
+{
+ trace_power_enabled = 0;
unregister_trace_power_start(probe_power_start);
unregister_trace_power_end(probe_power_end);
unregister_trace_power_mark(probe_power_mark);
@@ -188,7 +192,7 @@ static struct tracer power_tracer __read_mostly =
.init = power_trace_init,
.start = start_power_trace,
.stop = stop_power_trace,
- .reset = stop_power_trace,
+ .reset = power_trace_reset,
.print_line = power_print_line,
};
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 486785214e3..eb81556107f 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -112,7 +112,7 @@ int __trace_bprintk(unsigned long ip, const char *fmt, ...)
return 0;
va_start(ap, fmt);
- ret = trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap);
+ ret = trace_vbprintk(ip, fmt, ap);
va_end(ap);
return ret;
}
@@ -126,7 +126,7 @@ int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap)
if (!(trace_flags & TRACE_ITER_PRINTK))
return 0;
- return trace_vbprintk(ip, task_curr_ret_stack(current), fmt, ap);
+ return trace_vbprintk(ip, fmt, ap);
}
EXPORT_SYMBOL_GPL(__ftrace_vbprintk);
@@ -139,7 +139,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...)
return 0;
va_start(ap, fmt);
- ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
+ ret = trace_vprintk(ip, fmt, ap);
va_end(ap);
return ret;
}
@@ -150,7 +150,7 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
if (!(trace_flags & TRACE_ITER_PRINTK))
return 0;
- return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
+ return trace_vprintk(ip, fmt, ap);
}
EXPORT_SYMBOL_GPL(__ftrace_vprintk);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 77132c2cf3d..de35f200abd 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -18,6 +18,7 @@ static struct trace_array *ctx_trace;
static int __read_mostly tracer_enabled;
static int sched_ref;
static DEFINE_MUTEX(sched_register_mutex);
+static int sched_stopped;
static void
probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -28,7 +29,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
int cpu;
int pc;
- if (!sched_ref)
+ if (!sched_ref || sched_stopped)
return;
tracing_record_cmdline(prev);
@@ -193,6 +194,7 @@ static void stop_sched_trace(struct trace_array *tr)
static int sched_switch_trace_init(struct trace_array *tr)
{
ctx_trace = tr;
+ tracing_reset_online_cpus(tr);
tracing_start_sched_switch_record();
return 0;
}
@@ -205,13 +207,12 @@ static void sched_switch_trace_reset(struct trace_array *tr)
static void sched_switch_trace_start(struct trace_array *tr)
{
- tracing_reset_online_cpus(tr);
- tracing_start_sched_switch();
+ sched_stopped = 0;
}
static void sched_switch_trace_stop(struct trace_array *tr)
{
- tracing_stop_sched_switch();
+ sched_stopped = 1;
}
static struct tracer sched_switch_trace __read_mostly =
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b9109126706..499d01c44cd 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -250,6 +250,28 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+/* Maximum number of functions to trace before diagnosing a hang */
+#define GRAPH_MAX_FUNC_TEST 100000000
+
+static void __ftrace_dump(bool disable_tracing);
+static unsigned int graph_hang_thresh;
+
+/* Wrap the real function entry probe to avoid possible hanging */
+static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
+{
+ /* This is harmlessly racy, we want to approximately detect a hang */
+ if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
+ ftrace_graph_stop();
+ printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
+ if (ftrace_dump_on_oops)
+ __ftrace_dump(false);
+ return 0;
+ }
+
+ return trace_graph_entry(trace);
+}
+
/*
* Pretty much the same than for the function tracer from which the selftest
* has been borrowed.
@@ -261,15 +283,29 @@ trace_selftest_startup_function_graph(struct tracer *trace,
int ret;
unsigned long count;
- ret = tracer_init(trace, tr);
+ /*
+ * Simulate the init() callback but we attach a watchdog callback
+ * to detect and recover from possible hangs
+ */
+ tracing_reset_online_cpus(tr);
+ ret = register_ftrace_graph(&trace_graph_return,
+ &trace_graph_entry_watchdog);
if (ret) {
warn_failed_init_tracer(trace, ret);
goto out;
}
+ tracing_start_cmdline_record();
/* Sleep for a 1/10 of a second */
msleep(100);
+ /* Have we just recovered from a hang? */
+ if (graph_hang_thresh > GRAPH_MAX_FUNC_TEST) {
+ tracing_selftest_disabled = true;
+ ret = -1;
+ goto out;
+ }
+
tracing_stop();
/* check the trace buffer */
@@ -317,6 +353,14 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
local_irq_disable();
udelay(100);
local_irq_enable();
+
+ /*
+ * Stop the tracer to avoid a warning subsequent
+ * to buffer flipping failure because tracing_stop()
+ * disables the tr and max buffers, making flipping impossible
+ * in case of parallels max irqs off latencies.
+ */
+ trace->stop(tr);
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
@@ -371,6 +415,14 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
preempt_disable();
udelay(100);
preempt_enable();
+
+ /*
+ * Stop the tracer to avoid a warning subsequent
+ * to buffer flipping failure because tracing_stop()
+ * disables the tr and max buffers, making flipping impossible
+ * in case of parallels max preempt off latencies.
+ */
+ trace->stop(tr);
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
@@ -416,7 +468,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
- goto out;
+ goto out_no_start;
}
/* reset the max latency */
@@ -430,31 +482,35 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
/* reverse the order of preempt vs irqs */
local_irq_enable();
+ /*
+ * Stop the tracer to avoid a warning subsequent
+ * to buffer flipping failure because tracing_stop()
+ * disables the tr and max buffers, making flipping impossible
+ * in case of parallels max irqs/preempt off latencies.
+ */
+ trace->stop(tr);
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
ret = trace_test_buffer(tr, NULL);
- if (ret) {
- tracing_start();
+ if (ret)
goto out;
- }
ret = trace_test_buffer(&max_tr, &count);
- if (ret) {
- tracing_start();
+ if (ret)
goto out;
- }
if (!ret && !count) {
printk(KERN_CONT ".. no entries found ..");
ret = -1;
- tracing_start();
goto out;
}
/* do the test by disabling interrupts first this time */
tracing_max_latency = 0;
tracing_start();
+ trace->start(tr);
+
preempt_disable();
local_irq_disable();
udelay(100);
@@ -462,6 +518,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
/* reverse the order of preempt vs irqs */
local_irq_enable();
+ trace->stop(tr);
/* stop the tracing. */
tracing_stop();
/* check both trace buffers */
@@ -477,9 +534,10 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
goto out;
}
- out:
- trace->reset(tr);
+out:
tracing_start();
+out_no_start:
+ trace->reset(tr);
tracing_max_latency = save_max;
return ret;
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 39310e3434e..acdebd771a9 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -75,7 +75,7 @@ static int stat_seq_init(struct tracer_stat_session *session)
{
struct trace_stat_list *iter_entry, *new_entry;
struct tracer_stat *ts = session->ts;
- void *prev_stat;
+ void *stat;
int ret = 0;
int i;
@@ -85,6 +85,10 @@ static int stat_seq_init(struct tracer_stat_session *session)
if (!ts->stat_cmp)
ts->stat_cmp = dummy_cmp;
+ stat = ts->stat_start();
+ if (!stat)
+ goto exit;
+
/*
* The first entry. Actually this is the second, but the first
* one (the stat_list head) is pointless.
@@ -99,14 +103,19 @@ static int stat_seq_init(struct tracer_stat_session *session)
list_add(&new_entry->list, &session->stat_list);
- new_entry->stat = ts->stat_start();
- prev_stat = new_entry->stat;
+ new_entry->stat = stat;
/*
* Iterate over the tracer stat entries and store them in a sorted
* list.
*/
for (i = 1; ; i++) {
+ stat = ts->stat_next(stat, i);
+
+ /* End of insertion */
+ if (!stat)
+ break;
+
new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
if (!new_entry) {
ret = -ENOMEM;
@@ -114,31 +123,23 @@ static int stat_seq_init(struct tracer_stat_session *session)
}
INIT_LIST_HEAD(&new_entry->list);
- new_entry->stat = ts->stat_next(prev_stat, i);
+ new_entry->stat = stat;
- /* End of insertion */
- if (!new_entry->stat)
- break;
-
- list_for_each_entry(iter_entry, &session->stat_list, list) {
+ list_for_each_entry_reverse(iter_entry, &session->stat_list,
+ list) {
/* Insertion with a descendent sorting */
- if (ts->stat_cmp(new_entry->stat,
- iter_entry->stat) > 0) {
-
- list_add_tail(&new_entry->list,
- &iter_entry->list);
- break;
+ if (ts->stat_cmp(iter_entry->stat,
+ new_entry->stat) >= 0) {
- /* The current smaller value */
- } else if (list_is_last(&iter_entry->list,
- &session->stat_list)) {
list_add(&new_entry->list, &iter_entry->list);
break;
}
}
- prev_stat = new_entry->stat;
+ /* The current larger value */
+ if (list_empty(&new_entry->list))
+ list_add(&new_entry->list, &session->stat_list);
}
exit:
mutex_unlock(&session->stat_mutex);
@@ -160,7 +161,7 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
/* If we are in the beginning of the file, print the headers */
if (!*pos && session->ts->stat_headers)
- session->ts->stat_headers(s);
+ return SEQ_START_TOKEN;
return seq_list_start(&session->stat_list, *pos);
}
@@ -169,6 +170,9 @@ static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
{
struct tracer_stat_session *session = s->private;
+ if (p == SEQ_START_TOKEN)
+ return seq_list_start(&session->stat_list, *pos);
+
return seq_list_next(p, &session->stat_list, pos);
}
@@ -183,6 +187,9 @@ static int stat_seq_show(struct seq_file *s, void *v)
struct tracer_stat_session *session = s->private;
struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
+ if (v == SEQ_START_TOKEN)
+ return session->ts->stat_headers(s);
+
return session->ts->stat_show(s, l->stat);
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 66cf97449af..a2a3af29c94 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,21 +1,112 @@
-#include <linux/ftrace.h>
#include <linux/kernel.h>
-
+#include <linux/ftrace.h>
#include <asm/syscall.h>
#include "trace_output.h"
#include "trace.h"
-static atomic_t refcount;
+/* Keep a counter of the syscall tracing users */
+static int refcount;
+
+/* Prevent from races on thread flags toggling */
+static DEFINE_MUTEX(syscall_trace_lock);
+
+/* Option to display the parameters types */
+enum {
+ TRACE_SYSCALLS_OPT_TYPES = 0x1,
+};
+
+static struct tracer_opt syscalls_opts[] = {
+ { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
+ { }
+};
+
+static struct tracer_flags syscalls_flags = {
+ .val = 0, /* By default: no parameters types */
+ .opts = syscalls_opts
+};
+
+enum print_line_t
+print_syscall_enter(struct trace_iterator *iter, int flags)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *ent = iter->ent;
+ struct syscall_trace_enter *trace;
+ struct syscall_metadata *entry;
+ int i, ret, syscall;
+
+ trace_assign_type(trace, ent);
+
+ syscall = trace->nr;
+
+ entry = syscall_nr_to_meta(syscall);
+ if (!entry)
+ goto end;
+
+ ret = trace_seq_printf(s, "%s(", entry->name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ for (i = 0; i < entry->nb_args; i++) {
+ /* parameter types */
+ if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) {
+ ret = trace_seq_printf(s, "%s ", entry->types[i]);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ /* parameter values */
+ ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i],
+ trace->args[i],
+ i == entry->nb_args - 1 ? ")" : ",");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+end:
+ trace_seq_printf(s, "\n");
+ return TRACE_TYPE_HANDLED;
+}
+
+enum print_line_t
+print_syscall_exit(struct trace_iterator *iter, int flags)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *ent = iter->ent;
+ struct syscall_trace_exit *trace;
+ int syscall;
+ struct syscall_metadata *entry;
+ int ret;
+
+ trace_assign_type(trace, ent);
+
+ syscall = trace->nr;
+
+ entry = syscall_nr_to_meta(syscall);
+ if (!entry) {
+ trace_seq_printf(s, "\n");
+ return TRACE_TYPE_HANDLED;
+ }
+
+ ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
+ trace->ret);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
void start_ftrace_syscalls(void)
{
unsigned long flags;
struct task_struct *g, *t;
- if (atomic_inc_return(&refcount) != 1)
- goto out;
+ mutex_lock(&syscall_trace_lock);
+ /* Don't enable the flag on the tasks twice */
+ if (++refcount != 1)
+ goto unlock;
+
+ arch_init_ftrace_syscalls();
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, t) {
@@ -23,8 +114,9 @@ void start_ftrace_syscalls(void)
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
-out:
- atomic_dec(&refcount);
+
+unlock:
+ mutex_unlock(&syscall_trace_lock);
}
void stop_ftrace_syscalls(void)
@@ -32,8 +124,11 @@ void stop_ftrace_syscalls(void)
unsigned long flags;
struct task_struct *g, *t;
- if (atomic_dec_return(&refcount))
- goto out;
+ mutex_lock(&syscall_trace_lock);
+
+ /* There are perhaps still some users */
+ if (--refcount)
+ goto unlock;
read_lock_irqsave(&tasklist_lock, flags);
@@ -42,26 +137,64 @@ void stop_ftrace_syscalls(void)
} while_each_thread(g, t);
read_unlock_irqrestore(&tasklist_lock, flags);
-out:
- atomic_inc(&refcount);
+
+unlock:
+ mutex_unlock(&syscall_trace_lock);
}
void ftrace_syscall_enter(struct pt_regs *regs)
{
+ struct syscall_trace_enter *entry;
+ struct syscall_metadata *sys_data;
+ struct ring_buffer_event *event;
+ int size;
int syscall_nr;
syscall_nr = syscall_get_nr(current, regs);
- trace_printk("syscall %d enter\n", syscall_nr);
+ sys_data = syscall_nr_to_meta(syscall_nr);
+ if (!sys_data)
+ return;
+
+ size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
+
+ event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size,
+ 0, 0);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ entry->nr = syscall_nr;
+ syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
+
+ trace_current_buffer_unlock_commit(event, 0, 0);
+ trace_wake_up();
}
void ftrace_syscall_exit(struct pt_regs *regs)
{
+ struct syscall_trace_exit *entry;
+ struct syscall_metadata *sys_data;
+ struct ring_buffer_event *event;
int syscall_nr;
syscall_nr = syscall_get_nr(current, regs);
- trace_printk("syscall %d exit\n", syscall_nr);
+ sys_data = syscall_nr_to_meta(syscall_nr);
+ if (!sys_data)
+ return;
+
+ event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ entry->nr = syscall_nr;
+ entry->ret = syscall_get_return_value(current, regs);
+
+ trace_current_buffer_unlock_commit(event, 0, 0);
+ trace_wake_up();
}
static int init_syscall_tracer(struct trace_array *tr)
@@ -74,20 +207,24 @@ static int init_syscall_tracer(struct trace_array *tr)
static void reset_syscall_tracer(struct trace_array *tr)
{
stop_ftrace_syscalls();
+ tracing_reset_online_cpus(tr);
}
static struct trace_event syscall_enter_event = {
- .type = TRACE_SYSCALL_ENTER,
+ .type = TRACE_SYSCALL_ENTER,
+ .trace = print_syscall_enter,
};
static struct trace_event syscall_exit_event = {
- .type = TRACE_SYSCALL_EXIT,
+ .type = TRACE_SYSCALL_EXIT,
+ .trace = print_syscall_exit,
};
static struct tracer syscall_tracer __read_mostly = {
- .name = "syscall",
+ .name = "syscall",
.init = init_syscall_tracer,
- .reset = reset_syscall_tracer
+ .reset = reset_syscall_tracer,
+ .flags = &syscalls_flags,
};
__init int register_ftrace_syscalls(void)
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 9ab035b58cf..797201e4a13 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -196,6 +196,11 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
struct pid *pid;
struct task_struct *tsk;
+ spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+ if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
+ seq_printf(s, "\n");
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+
pid = find_get_pid(cws->pid);
if (pid) {
tsk = get_pid_task(pid, PIDTYPE_PID);
@@ -208,18 +213,13 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
put_pid(pid);
}
- spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
- if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
- seq_printf(s, "\n");
- spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
-
return 0;
}
static int workqueue_stat_headers(struct seq_file *s)
{
seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
- seq_printf(s, "# | | | |\n\n");
+ seq_printf(s, "# | | | |\n");
return 0;
}