diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace.h | 1 | ||||
-rw-r--r-- | kernel/trace/trace_hw_branches.c | 173 | ||||
-rw-r--r-- | kernel/trace/trace_workqueue.c | 64 |
3 files changed, 170 insertions, 68 deletions
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54b72781e92..b96037d970d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -438,7 +438,6 @@ void trace_function(struct trace_array *tr, void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index df21c1e72b9..fff3545fc86 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -1,7 +1,8 @@ /* * h/w branch tracer for x86 based on bts * - * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com> + * Copyright (C) 2008-2009 Intel Corporation. + * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 * */ @@ -10,6 +11,9 @@ #include <linux/debugfs.h> #include <linux/ftrace.h> #include <linux/kallsyms.h> +#include <linux/mutex.h> +#include <linux/cpu.h> +#include <linux/smp.h> #include <asm/ds.h> @@ -19,13 +23,32 @@ #define SIZEOF_BTS (1 << 13) +/* The tracer mutex protects the below per-cpu tracer array. + It needs to be held to: + - start tracing on all cpus + - stop tracing on all cpus + - start tracing on a single hotplug cpu + - stop tracing on a single hotplug cpu + - read the trace from all cpus + - read the trace from a single cpu +*/ +static DEFINE_MUTEX(bts_tracer_mutex); static DEFINE_PER_CPU(struct bts_tracer *, tracer); static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); #define this_tracer per_cpu(tracer, smp_processor_id()) #define this_buffer per_cpu(buffer, smp_processor_id()) +static int __read_mostly trace_hw_branches_enabled; +static struct trace_array *hw_branch_trace __read_mostly; + +/* + * Start tracing on the current cpu. + * The argument is ignored. + * + * pre: bts_tracer_mutex must be locked. + */ static void bts_trace_start_cpu(void *arg) { if (this_tracer) @@ -43,14 +66,20 @@ static void bts_trace_start_cpu(void *arg) static void bts_trace_start(struct trace_array *tr) { - int cpu; + mutex_lock(&bts_tracer_mutex); - tracing_reset_online_cpus(tr); + on_each_cpu(bts_trace_start_cpu, NULL, 1); + trace_hw_branches_enabled = 1; - for_each_cpu(cpu, cpu_possible_mask) - smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); + mutex_unlock(&bts_tracer_mutex); } +/* + * Start tracing on the current cpu. + * The argument is ignored. + * + * pre: bts_tracer_mutex must be locked. + */ static void bts_trace_stop_cpu(void *arg) { if (this_tracer) { @@ -61,26 +90,63 @@ static void bts_trace_stop_cpu(void *arg) static void bts_trace_stop(struct trace_array *tr) { - int cpu; + mutex_lock(&bts_tracer_mutex); + + trace_hw_branches_enabled = 0; + on_each_cpu(bts_trace_stop_cpu, NULL, 1); - for_each_cpu(cpu, cpu_possible_mask) + mutex_unlock(&bts_tracer_mutex); +} + +static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + mutex_lock(&bts_tracer_mutex); + + if (!trace_hw_branches_enabled) + goto out; + + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); + break; + case CPU_DOWN_PREPARE: smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); + break; + } + + out: + mutex_unlock(&bts_tracer_mutex); + return NOTIFY_DONE; } +static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { + .notifier_call = bts_hotcpu_handler +}; + static int bts_trace_init(struct trace_array *tr) { + hw_branch_trace = tr; + + register_hotcpu_notifier(&bts_hotcpu_notifier); tracing_reset_online_cpus(tr); bts_trace_start(tr); return 0; } +static void bts_trace_reset(struct trace_array *tr) +{ + bts_trace_stop(tr); + unregister_hotcpu_notifier(&bts_hotcpu_notifier); +} + static void bts_trace_print_header(struct seq_file *m) { - seq_puts(m, - "# CPU# FROM TO FUNCTION\n"); - seq_puts(m, - "# | | | |\n"); + seq_puts(m, "# CPU# TO <- FROM\n"); } static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) @@ -88,15 +154,15 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) struct trace_entry *entry = iter->ent; struct trace_seq *seq = &iter->seq; struct hw_branch_entry *it; + unsigned long symflags = TRACE_ITER_SYM_OFFSET; trace_assign_type(it, entry); if (entry->type == TRACE_HW_BRANCHES) { if (trace_seq_printf(seq, "%4d ", entry->cpu) && - trace_seq_printf(seq, "0x%016llx -> 0x%016llx ", - it->from, it->to) && - (!it->from || - seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) && + seq_print_ip_sym(seq, it->to, symflags) && + trace_seq_printf(seq, "\t <- ") && + seq_print_ip_sym(seq, it->from, symflags) && trace_seq_printf(seq, "\n")) return TRACE_TYPE_HANDLED; return TRACE_TYPE_PARTIAL_LINE;; @@ -104,26 +170,42 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) return TRACE_TYPE_UNHANDLED; } -void trace_hw_branch(struct trace_array *tr, u64 from, u64 to) +void trace_hw_branch(u64 from, u64 to) { + struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; - unsigned long irq; + unsigned long irq1, irq2; + int cpu; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq); - if (!event) + if (unlikely(!tr)) return; + + if (unlikely(!trace_hw_branches_enabled)) + return; + + local_irq_save(irq1); + cpu = raw_smp_processor_id(); + if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) + goto out; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq2); + if (!event) + goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, from); entry->ent.type = TRACE_HW_BRANCHES; - entry->ent.cpu = smp_processor_id(); + entry->ent.cpu = cpu; entry->from = from; entry->to = to; - ring_buffer_unlock_commit(tr->buffer, event, irq); + ring_buffer_unlock_commit(tr->buffer, event, irq2); + + out: + atomic_dec(&tr->data[cpu]->disabled); + local_irq_restore(irq1); } -static void trace_bts_at(struct trace_array *tr, - const struct bts_trace *trace, void *at) +static void trace_bts_at(const struct bts_trace *trace, void *at) { struct bts_struct bts; int err = 0; @@ -138,18 +220,29 @@ static void trace_bts_at(struct trace_array *tr, switch (bts.qualifier) { case BTS_BRANCH: - trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to); + trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to); break; } } +/* + * Collect the trace on the current cpu and write it into the ftrace buffer. + * + * pre: bts_tracer_mutex must be locked + */ static void trace_bts_cpu(void *arg) { struct trace_array *tr = (struct trace_array *) arg; const struct bts_trace *trace; unsigned char *at; - if (!this_tracer) + if (unlikely(!tr)) + return; + + if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled))) + return; + + if (unlikely(!this_tracer)) return; ds_suspend_bts(this_tracer); @@ -159,11 +252,11 @@ static void trace_bts_cpu(void *arg) for (at = trace->ds.top; (void *)at < trace->ds.end; at += trace->ds.size) - trace_bts_at(tr, trace, at); + trace_bts_at(trace, at); for (at = trace->ds.begin; (void *)at < trace->ds.top; at += trace->ds.size) - trace_bts_at(tr, trace, at); + trace_bts_at(trace, at); out: ds_resume_bts(this_tracer); @@ -171,22 +264,38 @@ out: static void trace_bts_prepare(struct trace_iterator *iter) { - int cpu; + mutex_lock(&bts_tracer_mutex); + + on_each_cpu(trace_bts_cpu, iter->tr, 1); + + mutex_unlock(&bts_tracer_mutex); +} + +static void trace_bts_close(struct trace_iterator *iter) +{ + tracing_reset_online_cpus(iter->tr); +} + +void trace_hw_branch_oops(void) +{ + mutex_lock(&bts_tracer_mutex); + + trace_bts_cpu(hw_branch_trace); - for_each_cpu(cpu, cpu_possible_mask) - smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1); + mutex_unlock(&bts_tracer_mutex); } struct tracer bts_tracer __read_mostly = { .name = "hw-branch-tracer", .init = bts_trace_init, - .reset = bts_trace_stop, + .reset = bts_trace_reset, .print_header = bts_trace_print_header, .print_line = bts_trace_print_line, .start = bts_trace_start, .stop = bts_trace_stop, - .open = trace_bts_prepare + .open = trace_bts_prepare, + .close = trace_bts_close }; __init static int init_bts_trace(void) diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index f8118d39ca9..4664990fe9c 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -8,6 +8,7 @@ #include <trace/workqueue.h> #include <linux/list.h> +#include <linux/percpu.h> #include "trace_stat.h" #include "trace.h" @@ -37,7 +38,8 @@ struct workqueue_global_stats { /* Don't need a global lock because allocated before the workqueues, and * never freed. */ -static struct workqueue_global_stats *all_workqueue_stat; +static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat); +#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu)) /* Insertion of a work */ static void @@ -48,8 +50,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread, struct cpu_workqueue_stats *node, *next; unsigned long flags; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, list) { if (node->pid == wq_thread->pid) { atomic_inc(&node->inserted); @@ -58,7 +60,7 @@ probe_workqueue_insertion(struct task_struct *wq_thread, } pr_debug("trace_workqueue: entry not found\n"); found: - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } /* Execution of a work */ @@ -70,8 +72,8 @@ probe_workqueue_execution(struct task_struct *wq_thread, struct cpu_workqueue_stats *node, *next; unsigned long flags; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, list) { if (node->pid == wq_thread->pid) { node->executed++; @@ -80,7 +82,7 @@ probe_workqueue_execution(struct task_struct *wq_thread, } pr_debug("trace_workqueue: entry not found\n"); found: - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } /* Creation of a cpu workqueue thread */ @@ -104,11 +106,11 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) cws->pid = wq_thread->pid; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - if (list_empty(&all_workqueue_stat[cpu].list)) + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + if (list_empty(&workqueue_cpu_stat(cpu)->list)) cws->first_entry = true; - list_add_tail(&cws->list, &all_workqueue_stat[cpu].list); - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } /* Destruction of a cpu workqueue thread */ @@ -119,8 +121,8 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread) struct cpu_workqueue_stats *node, *next; unsigned long flags; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list, + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list, list) { if (node->pid == wq_thread->pid) { list_del(&node->list); @@ -131,7 +133,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread) pr_debug("trace_workqueue: don't find workqueue to destroy\n"); found: - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); } @@ -141,13 +143,13 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu) struct cpu_workqueue_stats *ret = NULL; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); - if (!list_empty(&all_workqueue_stat[cpu].list)) - ret = list_entry(all_workqueue_stat[cpu].list.next, + if (!list_empty(&workqueue_cpu_stat(cpu)->list)) + ret = list_entry(workqueue_cpu_stat(cpu)->list.next, struct cpu_workqueue_stats, list); - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); return ret; } @@ -172,9 +174,9 @@ static void *workqueue_stat_next(void *prev, int idx) unsigned long flags; void *ret = NULL; - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) { - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); for (++cpu ; cpu < num_possible_cpus(); cpu++) { ret = workqueue_stat_start_cpu(cpu); if (ret) @@ -182,7 +184,7 @@ static void *workqueue_stat_next(void *prev, int idx) } return NULL; } - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); return list_entry(prev_cws->list.next, struct cpu_workqueue_stats, list); @@ -199,10 +201,10 @@ static int workqueue_stat_show(struct seq_file *s, void *p) cws->executed, trace_find_cmdline(cws->pid)); - spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags); - if (&cws->list == all_workqueue_stat[cpu].list.next) + spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); + if (&cws->list == workqueue_cpu_stat(cpu)->list.next) seq_printf(s, "\n"); - spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags); + spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); return 0; } @@ -258,17 +260,9 @@ int __init trace_workqueue_early_init(void) if (ret) goto no_creation; - all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats) - * num_possible_cpus(), GFP_KERNEL); - - if (!all_workqueue_stat) { - pr_warning("trace_workqueue: not enough memory\n"); - goto no_creation; - } - for_each_possible_cpu(cpu) { - spin_lock_init(&all_workqueue_stat[cpu].lock); - INIT_LIST_HEAD(&all_workqueue_stat[cpu].list); + spin_lock_init(&workqueue_cpu_stat(cpu)->lock); + INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); } return 0; |