aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig7
-rw-r--r--kernel/trace/ftrace.c47
-rw-r--r--kernel/trace/ring_buffer.c114
-rw-r--r--kernel/trace/trace.c236
-rw-r--r--kernel/trace/trace.h55
-rw-r--r--kernel/trace/trace_boot.c36
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_irqsoff.c41
-rw-r--r--kernel/trace/trace_sched_switch.c50
-rw-r--r--kernel/trace/trace_sched_wakeup.c52
-rw-r--r--kernel/trace/trace_stack.c8
11 files changed, 485 insertions, 167 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e..fc4febc3334 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,13 @@ config NOP_TRACER
config HAVE_FUNCTION_TRACER
bool
+config HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ bool
+ help
+ This gets selected when the arch tests the function_trace_stop
+ variable at the mcount call site. Otherwise, this variable
+ is tested by the called function.
+
config HAVE_DYNAMIC_FTRACE
bool
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a39d24568c..896c71f0f4c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,9 @@
int ftrace_enabled __read_mostly;
static int last_ftrace_enabled;
+/* Quick disabling of function tracer. */
+int function_trace_stop;
+
/*
* ftrace_disabled is set when an anomaly is discovered.
* ftrace_disabled is much stronger than ftrace_enabled.
@@ -63,6 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
{
@@ -88,8 +92,23 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
void clear_ftrace_function(void)
{
ftrace_trace_function = ftrace_stub;
+ __ftrace_trace_function = ftrace_stub;
}
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+/*
+ * For those archs that do not test ftrace_trace_stop in their
+ * mcount call site, we need to do it from C.
+ */
+static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
+{
+ if (function_trace_stop)
+ return;
+
+ __ftrace_trace_function(ip, parent_ip);
+}
+#endif
+
static int __register_ftrace_function(struct ftrace_ops *ops)
{
/* should not be called from interrupt context */
@@ -110,10 +129,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
* For one func, simply call it directly.
* For more than one func, call the chain.
*/
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
if (ops->next == &ftrace_list_end)
ftrace_trace_function = ops->func;
else
ftrace_trace_function = ftrace_list_func;
+#else
+ if (ops->next == &ftrace_list_end)
+ __ftrace_trace_function = ops->func;
+ else
+ __ftrace_trace_function = ftrace_list_func;
+ ftrace_trace_function = ftrace_test_stop_func;
+#endif
}
spin_unlock(&ftrace_lock);
@@ -526,7 +553,7 @@ static void ftrace_run_update_code(int command)
}
static ftrace_func_t saved_ftrace_func;
-static int ftrace_start;
+static int ftrace_start_up;
static DEFINE_MUTEX(ftrace_start_lock);
static void ftrace_startup(void)
@@ -537,8 +564,8 @@ static void ftrace_startup(void)
return;
mutex_lock(&ftrace_start_lock);
- ftrace_start++;
- if (ftrace_start == 1)
+ ftrace_start_up++;
+ if (ftrace_start_up == 1)
command |= FTRACE_ENABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -562,8 +589,8 @@ static void ftrace_shutdown(void)
return;
mutex_lock(&ftrace_start_lock);
- ftrace_start--;
- if (!ftrace_start)
+ ftrace_start_up--;
+ if (!ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -589,8 +616,8 @@ static void ftrace_startup_sysctl(void)
mutex_lock(&ftrace_start_lock);
/* Force update next time */
saved_ftrace_func = NULL;
- /* ftrace_start is true if we want ftrace running */
- if (ftrace_start)
+ /* ftrace_start_up is true if we want ftrace running */
+ if (ftrace_start_up)
command |= FTRACE_ENABLE_CALLS;
ftrace_run_update_code(command);
@@ -605,8 +632,8 @@ static void ftrace_shutdown_sysctl(void)
return;
mutex_lock(&ftrace_start_lock);
- /* ftrace_start is true if ftrace is running */
- if (ftrace_start)
+ /* ftrace_start_up is true if ftrace is running */
+ if (ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
ftrace_run_update_code(command);
@@ -1186,7 +1213,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
mutex_lock(&ftrace_sysctl_lock);
mutex_lock(&ftrace_start_lock);
- if (iter->filtered && ftrace_start && ftrace_enabled)
+ if (iter->filtered && ftrace_start_up && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
mutex_unlock(&ftrace_start_lock);
mutex_unlock(&ftrace_sysctl_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index cedf4e26828..6781e9aab2c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -16,6 +16,8 @@
#include <linux/list.h>
#include <linux/fs.h>
+#include "trace.h"
+
/* Up this if you want to test the TIME_EXTENTS and normalization */
#define DEBUG_SHIFT 0
@@ -152,7 +154,7 @@ static inline int test_time_stamp(u64 delta)
struct ring_buffer_per_cpu {
int cpu;
struct ring_buffer *buffer;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct lock_class_key lock_key;
struct list_head pages;
struct buffer_page *head_page; /* read from head */
@@ -289,7 +291,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
cpu_buffer->cpu = cpu;
cpu_buffer->buffer = buffer;
- spin_lock_init(&cpu_buffer->lock);
+ cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
INIT_LIST_HEAD(&cpu_buffer->pages);
page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
@@ -852,7 +854,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (write > BUF_PAGE_SIZE) {
struct buffer_page *next_page = tail_page;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
rb_inc_page(cpu_buffer, &next_page);
@@ -928,7 +931,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
rb_set_commit_to_write(cpu_buffer);
}
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
/* fail and let the caller try again */
return ERR_PTR(-EAGAIN);
@@ -951,7 +955,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
return event;
out_unlock:
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return NULL;
}
@@ -1022,8 +1027,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event;
u64 ts, delta;
int commit = 0;
+ int nr_loops = 0;
again:
+ /*
+ * We allow for interrupts to reenter here and do a trace.
+ * If one does, it will cause this original code to loop
+ * back here. Even with heavy interrupts happening, this
+ * should only happen a few times in a row. If this happens
+ * 1000 times in a row, there must be either an interrupt
+ * storm or we have something buggy.
+ * Bail!
+ */
+ if (unlikely(++nr_loops > 1000)) {
+ RB_WARN_ON(cpu_buffer, 1);
+ return NULL;
+ }
+
ts = ring_buffer_time_stamp(cpu_buffer->cpu);
/*
@@ -1122,8 +1142,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
return NULL;
/* If we are tracing schedule, we don't want to recurse */
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
@@ -1154,10 +1173,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
return event;
out:
- if (resched)
- preempt_enable_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
return NULL;
}
@@ -1199,12 +1215,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
/*
* Only the last preempt count needs to restore preemption.
*/
- if (preempt_count() == 1) {
- if (per_cpu(rb_need_resched, cpu))
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
- } else
+ if (preempt_count() == 1)
+ ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+ else
preempt_enable_no_resched_notrace();
return 0;
@@ -1237,8 +1250,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
if (atomic_read(&buffer->record_disabled))
return -EBUSY;
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
@@ -1264,10 +1276,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
ret = 0;
out:
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
return ret;
}
@@ -1532,10 +1541,24 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{
struct buffer_page *reader = NULL;
unsigned long flags;
+ int nr_loops = 0;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
again:
+ /*
+ * This should normally only loop twice. But because the
+ * start of the reader inserts an empty page, it causes
+ * a case where we will loop three times. There should be no
+ * reason to loop four times (that I know of).
+ */
+ if (unlikely(++nr_loops > 3)) {
+ RB_WARN_ON(cpu_buffer, 1);
+ reader = NULL;
+ goto out;
+ }
+
reader = cpu_buffer->reader_page;
/* If there's more to read, return this page */
@@ -1583,7 +1606,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto again;
out:
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return reader;
}
@@ -1665,6 +1689,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
struct buffer_page *reader;
+ int nr_loops = 0;
if (!cpu_isset(cpu, buffer->cpumask))
return NULL;
@@ -1672,6 +1697,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
cpu_buffer = buffer->buffers[cpu];
again:
+ /*
+ * We repeat when a timestamp is encountered. It is possible
+ * to get multiple timestamps from an interrupt entering just
+ * as one timestamp is about to be written. The max times
+ * that this can happen is the number of nested interrupts we
+ * can have. Nesting 10 deep of interrupts is clearly
+ * an anomaly.
+ */
+ if (unlikely(++nr_loops > 10)) {
+ RB_WARN_ON(cpu_buffer, 1);
+ return NULL;
+ }
+
reader = rb_get_reader_page(cpu_buffer);
if (!reader)
return NULL;
@@ -1722,6 +1760,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
+ int nr_loops = 0;
if (ring_buffer_iter_empty(iter))
return NULL;
@@ -1730,6 +1769,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
buffer = cpu_buffer->buffer;
again:
+ /*
+ * We repeat when a timestamp is encountered. It is possible
+ * to get multiple timestamps from an interrupt entering just
+ * as one timestamp is about to be written. The max times
+ * that this can happen is the number of nested interrupts we
+ * can have. Nesting 10 deep of interrupts is clearly
+ * an anomaly.
+ */
+ if (unlikely(++nr_loops > 10)) {
+ RB_WARN_ON(cpu_buffer, 1);
+ return NULL;
+ }
+
if (rb_per_cpu_empty(cpu_buffer))
return NULL;
@@ -1824,9 +1876,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
atomic_inc(&cpu_buffer->record_disabled);
synchronize_sched();
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
ring_buffer_iter_reset(iter);
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
return iter;
}
@@ -1912,11 +1966,13 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
if (!cpu_isset(cpu, buffer->cpumask))
return;
- spin_lock_irqsave(&cpu_buffer->lock, flags);
+ local_irq_save(flags);
+ __raw_spin_lock(&cpu_buffer->lock);
rb_reset_cpu(cpu_buffer);
- spin_unlock_irqrestore(&cpu_buffer->lock, flags);
+ __raw_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
}
/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e4c40c868d6..d55ccfc8d67 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -43,6 +43,15 @@
unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
unsigned long __read_mostly tracing_thresh;
+
+/*
+ * Kill all tracing for good (never come back).
+ * It is initialized to 1 but will turn to zero if the initialization
+ * of the tracer is successful. But that is the only place that sets
+ * this back to zero.
+ */
+int tracing_disabled = 1;
+
static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
static inline void ftrace_disable_cpu(void)
@@ -62,8 +71,6 @@ static cpumask_t __read_mostly tracing_buffer_mask;
#define for_each_tracing_cpu(cpu) \
for_each_cpu_mask(cpu, tracing_buffer_mask)
-static int tracing_disabled = 1;
-
/*
* ftrace_dump_on_oops - variable to dump ftrace buffer on oops
*
@@ -143,6 +150,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
/* tracer_enabled is used to toggle activation of a tracer */
static int tracer_enabled = 1;
+/**
+ * tracing_is_enabled - return tracer_enabled status
+ *
+ * This function is used by other tracers to know the status
+ * of the tracer_enabled flag. Tracers may use this function
+ * to know if it should enable their features when starting
+ * up. See irqsoff tracer for an example (start_irqsoff_tracer).
+ */
+int tracing_is_enabled(void)
+{
+ return tracer_enabled;
+}
+
/* function tracing enabled */
int ftrace_function_enabled;
@@ -244,6 +264,7 @@ static const char *trace_options[] = {
"stacktrace",
"sched-tree",
"ftrace_printk",
+ "ftrace_preempt",
NULL
};
@@ -612,6 +633,76 @@ static void trace_init_cmdlines(void)
cmdline_idx = 0;
}
+static int trace_stop_count;
+static DEFINE_SPINLOCK(tracing_start_lock);
+
+/**
+ * tracing_start - quick start of the tracer
+ *
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
+ */
+void tracing_start(void)
+{
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ if (tracing_disabled)
+ return;
+
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (--trace_stop_count)
+ goto out;
+
+ if (trace_stop_count < 0) {
+ /* Someone screwed up their debugging */
+ WARN_ON_ONCE(1);
+ trace_stop_count = 0;
+ goto out;
+ }
+
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_enable(buffer);
+
+ ftrace_start();
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
+/**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+void tracing_stop(void)
+{
+ struct ring_buffer *buffer;
+ unsigned long flags;
+
+ ftrace_stop();
+ spin_lock_irqsave(&tracing_start_lock, flags);
+ if (trace_stop_count++)
+ goto out;
+
+ buffer = global_trace.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ buffer = max_tr.buffer;
+ if (buffer)
+ ring_buffer_record_disable(buffer);
+
+ out:
+ spin_unlock_irqrestore(&tracing_start_lock, flags);
+}
+
void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
@@ -891,7 +982,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
#ifdef CONFIG_FUNCTION_TRACER
static void
-function_trace_call(unsigned long ip, unsigned long parent_ip)
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
{
struct trace_array *tr = &global_trace;
struct trace_array_cpu *data;
@@ -904,8 +995,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
return;
pc = preempt_count();
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -915,10 +1005,38 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
trace_function(tr, data, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
+}
+
+static void
+function_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ /*
+ * Need to use raw, since this must be called before the
+ * recursive protection is performed.
+ */
+ raw_local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ trace_function(tr, data, ip, parent_ip, flags, pc);
+ }
+
+ atomic_dec(&data->disabled);
+ raw_local_irq_restore(flags);
}
static struct ftrace_ops trace_ops __read_mostly =
@@ -929,9 +1047,14 @@ static struct ftrace_ops trace_ops __read_mostly =
void tracing_start_function_trace(void)
{
ftrace_function_enabled = 0;
+
+ if (trace_flags & TRACE_ITER_PREEMPTONLY)
+ trace_ops.func = function_trace_call_preempt_only;
+ else
+ trace_ops.func = function_trace_call;
+
register_ftrace_function(&trace_ops);
- if (tracer_enabled)
- ftrace_function_enabled = 1;
+ ftrace_function_enabled = 1;
}
void tracing_stop_function_trace(void)
@@ -1078,10 +1201,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
atomic_inc(&trace_record_cmdline_disabled);
- /* let the tracer grab locks here if needed */
- if (current_trace->start)
- current_trace->start(iter);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
@@ -1108,28 +1227,24 @@ static void *s_start(struct seq_file *m, loff_t *pos)
static void s_stop(struct seq_file *m, void *p)
{
- struct trace_iterator *iter = m->private;
-
atomic_dec(&trace_record_cmdline_disabled);
-
- /* let the tracer release locks here if needed */
- if (current_trace && current_trace == iter->trace && iter->trace->stop)
- iter->trace->stop(iter);
-
mutex_unlock(&trace_types_lock);
}
-#define KRETPROBE_MSG "[unknown/kretprobe'd]"
-
#ifdef CONFIG_KRETPROBES
-static inline int kretprobed(unsigned long addr)
+static inline const char *kretprobed(const char *name)
{
- return addr == (unsigned long)kretprobe_trampoline;
+ static const char tramp_name[] = "kretprobe_trampoline";
+ int size = sizeof(tramp_name);
+
+ if (strncmp(tramp_name, name, size) == 0)
+ return "[unknown/kretprobe'd]";
+ return name;
}
#else
-static inline int kretprobed(unsigned long addr)
+static inline const char *kretprobed(const char *name)
{
- return 0;
+ return name;
}
#endif /* CONFIG_KRETPROBES */
@@ -1138,10 +1253,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
{
#ifdef CONFIG_KALLSYMS
char str[KSYM_SYMBOL_LEN];
+ const char *name;
kallsyms_lookup(address, NULL, NULL, NULL, str);
- return trace_seq_printf(s, fmt, str);
+ name = kretprobed(str);
+
+ return trace_seq_printf(s, fmt, name);
#endif
return 1;
}
@@ -1152,9 +1270,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
{
#ifdef CONFIG_KALLSYMS
char str[KSYM_SYMBOL_LEN];
+ const char *name;
sprint_symbol(str, address);
- return trace_seq_printf(s, fmt, str);
+ name = kretprobed(str);
+
+ return trace_seq_printf(s, fmt, name);
#endif
return 1;
}
@@ -1408,10 +1529,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
seq_print_ip_sym(s, field->ip, sym_flags);
trace_seq_puts(s, " (");
- if (kretprobed(field->parent_ip))
- trace_seq_puts(s, KRETPROBE_MSG);
- else
- seq_print_ip_sym(s, field->parent_ip, sym_flags);
+ seq_print_ip_sym(s, field->parent_ip, sym_flags);
trace_seq_puts(s, ")\n");
break;
}
@@ -1527,12 +1645,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
ret = trace_seq_printf(s, " <-");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- if (kretprobed(field->parent_ip))
- ret = trace_seq_puts(s, KRETPROBE_MSG);
- else
- ret = seq_print_ip_sym(s,
- field->parent_ip,
- sym_flags);
+ ret = seq_print_ip_sym(s,
+ field->parent_ip,
+ sym_flags);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
@@ -1783,7 +1898,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
return TRACE_TYPE_HANDLED;
SEQ_PUT_FIELD_RET(s, entry->pid);
- SEQ_PUT_FIELD_RET(s, iter->cpu);
+ SEQ_PUT_FIELD_RET(s, entry->cpu);
SEQ_PUT_FIELD_RET(s, iter->ts);
switch (entry->type) {
@@ -1945,10 +2060,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
m->private = iter;
/* stop the trace while dumping */
- if (iter->tr->ctrl) {
- tracer_enabled = 0;
- ftrace_function_enabled = 0;
- }
+ tracing_stop();
if (iter->trace && iter->trace->open)
iter->trace->open(iter);
@@ -1993,14 +2105,7 @@ int tracing_release(struct inode *inode, struct file *file)
iter->trace->close(iter);
/* reenable tracing if it was previously enabled */
- if (iter->tr->ctrl) {
- tracer_enabled = 1;
- /*
- * It is safe to enable function tracing even if it
- * isn't used
- */
- ftrace_function_enabled = 1;
- }
+ tracing_start();
mutex_unlock(&trace_types_lock);
seq_release(inode, file);
@@ -2338,11 +2443,10 @@ static ssize_t
tracing_ctrl_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct trace_array *tr = filp->private_data;
char buf[64];
int r;
- r = sprintf(buf, "%ld\n", tr->ctrl);
+ r = sprintf(buf, "%u\n", tracer_enabled);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
@@ -2370,16 +2474,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
val = !!val;
mutex_lock(&trace_types_lock);
- if (tr->ctrl ^ val) {
- if (val)
+ if (tracer_enabled ^ val) {
+ if (val) {
tracer_enabled = 1;
- else
+ if (current_trace->start)
+ current_trace->start(tr);
+ tracing_start();
+ } else {
tracer_enabled = 0;
-
- tr->ctrl = val;
-
- if (current_trace && current_trace->ctrl_update)
- current_trace->ctrl_update(tr);
+ tracing_stop();
+ if (current_trace->stop)
+ current_trace->stop(tr);
+ }
}
mutex_unlock(&trace_types_lock);
@@ -3251,6 +3357,8 @@ __init static int tracer_alloc_buffers(void)
register_tracer(&nop_trace);
#ifdef CONFIG_BOOT_TRACER
+ /* We don't want to launch sched_switch tracer yet */
+ global_trace.ctrl = 0;
register_tracer(&boot_tracer);
current_trace = &boot_tracer;
current_trace->init(&global_trace);
@@ -3259,7 +3367,7 @@ __init static int tracer_alloc_buffers(void)
#endif
/* All seems OK, enable tracing */
- global_trace.ctrl = tracer_enabled;
+ global_trace.ctrl = 1;
tracing_disabled = 0;
atomic_notifier_chain_register(&panic_notifier_list,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad05270..3422489fad5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -49,6 +49,7 @@ struct ftrace_entry {
unsigned long parent_ip;
};
extern struct tracer boot_tracer;
+extern struct tracer sched_switch_trace; /* Used by the boot tracer */
/*
* Context switch trace entry - which task (and prio) we switched from/to:
@@ -236,11 +237,11 @@ struct tracer {
const char *name;
void (*init)(struct trace_array *tr);
void (*reset)(struct trace_array *tr);
+ void (*start)(struct trace_array *tr);
+ void (*stop)(struct trace_array *tr);
void (*open)(struct trace_iterator *iter);
void (*pipe_open)(struct trace_iterator *iter);
void (*close)(struct trace_iterator *iter);
- void (*start)(struct trace_iterator *iter);
- void (*stop)(struct trace_iterator *iter);
ssize_t (*read)(struct trace_iterator *iter,
struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos);
@@ -281,6 +282,7 @@ struct trace_iterator {
long idx;
};
+int tracing_is_enabled(void);
void trace_wake_up(void);
void tracing_reset(struct trace_array *tr, int cpu);
int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -415,8 +417,57 @@ enum trace_iterator_flags {
TRACE_ITER_STACKTRACE = 0x100,
TRACE_ITER_SCHED_TREE = 0x200,
TRACE_ITER_PRINTK = 0x400,
+ TRACE_ITER_PREEMPTONLY = 0x800,
};
extern struct tracer nop_trace;
+/**
+ * ftrace_preempt_disable - disable preemption scheduler safe
+ *
+ * When tracing can happen inside the scheduler, there exists
+ * cases that the tracing might happen before the need_resched
+ * flag is checked. If this happens and the tracer calls
+ * preempt_enable (after a disable), a schedule might take place
+ * causing an infinite recursion.
+ *
+ * To prevent this, we read the need_recshed flag before
+ * disabling preemption. When we want to enable preemption we
+ * check the flag, if it is set, then we call preempt_enable_no_resched.
+ * Otherwise, we call preempt_enable.
+ *
+ * The rational for doing the above is that if need resched is set
+ * and we have yet to reschedule, we are either in an atomic location
+ * (where we do not need to check for scheduling) or we are inside
+ * the scheduler and do not want to resched.
+ */
+static inline int ftrace_preempt_disable(void)
+{
+ int resched;
+
+ resched = need_resched();
+ preempt_disable_notrace();
+
+ return resched;
+}
+
+/**
+ * ftrace_preempt_enable - enable preemption scheduler safe
+ * @resched: the return value from ftrace_preempt_disable
+ *
+ * This is a scheduler safe way to enable preemption and not miss
+ * any preemption checks. The disabled saved the state of preemption.
+ * If resched is set, then we were either inside an atomic or
+ * are inside the scheduler (we would have already scheduled
+ * otherwise). In this case, we do not want to call normal
+ * preempt_enable, but preempt_enable_no_resched instead.
+ */
+static inline void ftrace_preempt_enable(int resched)
+{
+ if (resched)
+ preempt_enable_no_resched_notrace();
+ else
+ preempt_enable_notrace();
+}
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff..bd5046c9deb 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,23 +13,33 @@
#include "trace.h"
static struct trace_array *boot_trace;
-static int trace_boot_enabled;
+static bool pre_initcalls_finished;
-
-/* Should be started after do_pre_smp_initcalls() in init/main.c */
+/* Tells the boot tracer that the pre_smp_initcalls are finished.
+ * So we are ready .
+ * It doesn't enable sched events tracing however.
+ * You have to call enable_boot_trace to do so.
+ */
void start_boot_trace(void)
{
- trace_boot_enabled = 1;
+ pre_initcalls_finished = true;
}
-void stop_boot_trace(void)
+void enable_boot_trace(void)
{
- trace_boot_enabled = 0;
+ if (pre_initcalls_finished)
+ tracing_start_cmdline_record();
}
-void reset_boot_trace(struct trace_array *tr)
+void disable_boot_trace(void)
{
- stop_boot_trace();
+ if (pre_initcalls_finished)
+ tracing_stop_cmdline_record();
+}
+
+static void reset_boot_trace(struct trace_array *tr)
+{
+ sched_switch_trace.reset(tr);
}
static void boot_trace_init(struct trace_array *tr)
@@ -37,18 +47,18 @@ static void boot_trace_init(struct trace_array *tr)
int cpu;
boot_trace = tr;
- trace_boot_enabled = 0;
-
for_each_cpu_mask(cpu, cpu_possible_map)
tracing_reset(tr, cpu);
+
+ sched_switch_trace.init(tr);
}
static void boot_trace_ctrl_update(struct trace_array *tr)
{
if (tr->ctrl)
- start_boot_trace();
+ enable_boot_trace();
else
- stop_boot_trace();
+ disable_boot_trace();
}
static enum print_line_t initcall_print_line(struct trace_iterator *iter)
@@ -99,7 +109,7 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
unsigned long irq_flags;
struct trace_array *tr = boot_trace;
- if (!trace_boot_enabled)
+ if (!pre_initcalls_finished)
return;
/* Get its name now since this function could
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0f85a64003d..9f1b0de7128 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -62,11 +62,17 @@ static void function_trace_ctrl_update(struct trace_array *tr)
stop_function_trace(tr);
}
+static void function_trace_start(struct trace_array *tr)
+{
+ function_reset(tr);
+}
+
static struct tracer function_trace __read_mostly =
{
.name = "function",
.init = function_trace_init,
.reset = function_trace_reset,
+ .start = function_trace_start,
.ctrl_update = function_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9c74071c10e..a87a20fa3fc 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
}
#endif /* CONFIG_PREEMPT_TRACER */
+/*
+ * save_tracer_enabled is used to save the state of the tracer_enabled
+ * variable when we disable it when we open a trace output file.
+ */
+static int save_tracer_enabled;
+
static void start_irqsoff_tracer(struct trace_array *tr)
{
register_ftrace_function(&trace_ops);
- tracer_enabled = 1;
+ if (tracing_is_enabled()) {
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+ } else {
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
+ }
}
static void stop_irqsoff_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
+ save_tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
}
@@ -389,17 +402,29 @@ static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
stop_irqsoff_tracer(tr);
}
+static void irqsoff_tracer_start(struct trace_array *tr)
+{
+ irqsoff_tracer_reset(tr);
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+}
+
+static void irqsoff_tracer_stop(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
+}
+
static void irqsoff_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
- if (iter->tr->ctrl)
- stop_irqsoff_tracer(iter->tr);
+ tracer_enabled = 0;
}
static void irqsoff_tracer_close(struct trace_iterator *iter)
{
- if (iter->tr->ctrl)
- start_irqsoff_tracer(iter->tr);
+ /* restart tracing */
+ tracer_enabled = save_tracer_enabled;
}
#ifdef CONFIG_IRQSOFF_TRACER
@@ -414,6 +439,8 @@ static struct tracer irqsoff_tracer __read_mostly =
.name = "irqsoff",
.init = irqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
@@ -440,6 +467,8 @@ static struct tracer preemptoff_tracer __read_mostly =
.name = "preemptoff",
.init = preemptoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
@@ -468,6 +497,8 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
.name = "preemptirqsoff",
.init = preemptirqsoff_tracer_init,
.reset = irqsoff_tracer_reset,
+ .start = irqsoff_tracer_start,
+ .stop = irqsoff_tracer_stop,
.open = irqsoff_tracer_open,
.close = irqsoff_tracer_close,
.ctrl_update = irqsoff_tracer_ctrl_update,
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b8f56beb1a6..91c699be8c8 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@
static struct trace_array *ctx_trace;
static int __read_mostly tracer_enabled;
-static atomic_t sched_ref;
+static int sched_ref;
+static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
int cpu;
int pc;
- if (!atomic_read(&sched_ref))
+ if (!sched_ref)
return;
tracing_record_cmdline(prev);
@@ -123,20 +124,22 @@ static void tracing_sched_unregister(void)
static void tracing_start_sched_switch(void)
{
- long ref;
-
- ref = atomic_inc_return(&sched_ref);
- if (ref == 1)
+ mutex_lock(&sched_register_mutex);
+ if (!(sched_ref++)) {
+ tracer_enabled = 1;
tracing_sched_register();
+ }
+ mutex_unlock(&sched_register_mutex);
}
static void tracing_stop_sched_switch(void)
{
- long ref;
-
- ref = atomic_dec_and_test(&sched_ref);
- if (ref)
+ mutex_lock(&sched_register_mutex);
+ if (!(--sched_ref)) {
tracing_sched_unregister();
+ tracer_enabled = 0;
+ }
+ mutex_unlock(&sched_register_mutex);
}
void tracing_start_cmdline_record(void)
@@ -153,12 +156,10 @@ static void start_sched_trace(struct trace_array *tr)
{
sched_switch_reset(tr);
tracing_start_cmdline_record();
- tracer_enabled = 1;
}
static void stop_sched_trace(struct trace_array *tr)
{
- tracer_enabled = 0;
tracing_stop_cmdline_record();
}
@@ -172,7 +173,7 @@ static void sched_switch_trace_init(struct trace_array *tr)
static void sched_switch_trace_reset(struct trace_array *tr)
{
- if (tr->ctrl)
+ if (tr->ctrl && sched_ref)
stop_sched_trace(tr);
}
@@ -185,11 +186,24 @@ static void sched_switch_trace_ctrl_update(struct trace_array *tr)
stop_sched_trace(tr);
}
-static struct tracer sched_switch_trace __read_mostly =
+static void sched_switch_trace_start(struct trace_array *tr)
+{
+ sched_switch_reset(tr);
+ tracing_start_sched_switch();
+}
+
+static void sched_switch_trace_stop(struct trace_array *tr)
+{
+ tracing_stop_sched_switch();
+}
+
+struct tracer sched_switch_trace __read_mostly =
{
.name = "sched_switch",
.init = sched_switch_trace_init,
.reset = sched_switch_trace_reset,
+ .start = sched_switch_trace_start,
+ .stop = sched_switch_trace_stop,
.ctrl_update = sched_switch_trace_ctrl_update,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sched_switch,
@@ -198,14 +212,6 @@ static struct tracer sched_switch_trace __read_mostly =
__init static int init_sched_switch_trace(void)
{
- int ret = 0;
-
- if (atomic_read(&sched_ref))
- ret = tracing_sched_register();
- if (ret) {
- pr_info("error registering scheduler trace\n");
- return ret;
- }
return register_tracer(&sched_switch_trace);
}
device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3ae93f16b56..240577bc8ba 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
return;
pc = preempt_count();
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
out:
atomic_dec(&data->disabled);
- /*
- * To prevent recursion from the scheduler, if the
- * resched flag was set before we entered, then
- * don't reschedule.
- */
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
}
static struct ftrace_ops trace_ops __read_mostly =
@@ -271,6 +262,12 @@ out:
atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
+/*
+ * save_tracer_enabled is used to save the state of the tracer_enabled
+ * variable when we disable it when we open a trace output file.
+ */
+static int save_tracer_enabled;
+
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
@@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
register_ftrace_function(&trace_ops);
- tracer_enabled = 1;
+ if (tracing_is_enabled()) {
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+ } else {
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
+ }
return;
fail_deprobe_wake_new:
@@ -321,6 +324,7 @@ fail_deprobe:
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
+ save_tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
unregister_trace_sched_switch(probe_wakeup_sched_switch);
unregister_trace_sched_wakeup_new(probe_wakeup);
@@ -352,18 +356,32 @@ static void wakeup_tracer_ctrl_update(struct trace_array *tr)
stop_wakeup_tracer(tr);
}
+static void wakeup_tracer_start(struct trace_array *tr)
+{
+ wakeup_reset(tr);
+ tracer_enabled = 1;
+ save_tracer_enabled = 1;
+}
+
+static void wakeup_tracer_stop(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+ save_tracer_enabled = 0;
+}
+
static void wakeup_tracer_open(struct trace_iterator *iter)
{
/* stop the trace while dumping */
- if (iter->tr->ctrl)
- stop_wakeup_tracer(iter->tr);
+ tracer_enabled = 0;
}
static void wakeup_tracer_close(struct trace_iterator *iter)
{
/* forget about any processes we were recording */
- if (iter->tr->ctrl)
- start_wakeup_tracer(iter->tr);
+ if (save_tracer_enabled) {
+ wakeup_reset(iter->tr);
+ tracer_enabled = 1;
+ }
}
static struct tracer wakeup_tracer __read_mostly =
@@ -371,6 +389,8 @@ static struct tracer wakeup_tracer __read_mostly =
.name = "wakeup",
.init = wakeup_tracer_init,
.reset = wakeup_tracer_reset,
+ .start = wakeup_tracer_start,
+ .stop = wakeup_tracer_stop,
.open = wakeup_tracer_open,
.close = wakeup_tracer_close,
.ctrl_update = wakeup_tracer_ctrl_update,
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index be682b62fe5..d39e8b7de6a 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -107,8 +107,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
if (unlikely(!ftrace_enabled || stack_trace_disabled))
return;
- resched = need_resched();
- preempt_disable_notrace();
+ resched = ftrace_preempt_disable();
cpu = raw_smp_processor_id();
/* no atomic needed, we only modify this variable by this cpu */
@@ -120,10 +119,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
out:
per_cpu(trace_active, cpu)--;
/* prevent recursion in schedule */
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ ftrace_preempt_enable(resched);
}
static struct ftrace_ops trace_ops __read_mostly =