aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-09-12 08:06:14 +0200
committerIngo Molnar <mingo@elte.hu>2009-09-13 10:22:43 +0200
commitcdce9d738b91e1ec686e4ef6ca46d46e2891e208 (patch)
tree1fd64c4bef114ce44a3c96ad7183fa200bff3d12
parent419ab0d6a959f41ec7fde807fe311aaafb05c3be (diff)
perf sched: Add sched latency profiling
Add the -l --latency option that reports statistics about the scheduler latencies. For now, the latencies are measured in the following sequence scope: - task A is sleeping (D or S state) - task B wakes up A ^ | | latency timeframe | | v - task A is scheduled in Start by recording every scheduler events: perf record -e sched:* and then fetch the results: perf sched -l Tasks count total avg max migration/0 2 39849 19924 28826 ksoftirqd/0 7 756383 108054 373014 migration/1 5 45391 9078 10452 ksoftirqd/1 2 399055 199527 359130 events/0 8 4780110 597513 4500250 events/1 9 6353057 705895 2986012 kblockd/0 42 37805097 900121 5077684 The snapshot are in nanoseconds. - Count: number of snapshots taken for the given task - Total: total latencies in nanosec - Avg : average of latency between wake up and sched in - Max : max snapshot latency Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--tools/perf/builtin-sched.c296
1 files changed, 285 insertions, 11 deletions
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 756fe62eb04..4f9e943181a 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -31,6 +31,7 @@ static struct perf_header *header;
static u64 sample_type;
static int replay_mode;
+static int lat_mode;
/*
@@ -847,6 +848,269 @@ static struct trace_sched_handler replay_ops = {
.fork_event = replay_fork_event,
};
+#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+
+enum thread_state {
+ THREAD_SLEEPING,
+ THREAD_WAKED_UP,
+ THREAD_SCHED_IN,
+ THREAD_IGNORE
+};
+
+struct lat_snapshot {
+ struct list_head list;
+ enum thread_state state;
+ u64 wake_up_time;
+ u64 sched_in_time;
+};
+
+struct thread_latency {
+ struct list_head snapshot_list;
+ struct thread *thread;
+ struct rb_node node;
+};
+
+static struct rb_root lat_snapshot_root;
+
+static struct thread_latency *
+thread_latency_search(struct rb_root *root, struct thread *thread)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct thread_latency *lat;
+
+ lat = container_of(node, struct thread_latency, node);
+ if (thread->pid < lat->thread->pid)
+ node = node->rb_left;
+ else if (thread->pid > lat->thread->pid)
+ node = node->rb_right;
+ else {
+ return lat;
+ }
+ }
+ return NULL;
+}
+
+static void
+__thread_latency_insert(struct rb_root *root, struct thread_latency *data)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ while (*new) {
+ struct thread_latency *this;
+
+ this = container_of(*new, struct thread_latency, node);
+ parent = *new;
+ if (data->thread->pid < this->thread->pid)
+ new = &((*new)->rb_left);
+ else if (data->thread->pid > this->thread->pid)
+ new = &((*new)->rb_right);
+ else
+ die("Double thread insertion\n");
+ }
+
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void thread_latency_insert(struct thread *thread)
+{
+ struct thread_latency *lat;
+ lat = calloc(sizeof(*lat), 1);
+ if (!lat)
+ die("No memory");
+
+ lat->thread = thread;
+ INIT_LIST_HEAD(&lat->snapshot_list);
+ __thread_latency_insert(&lat_snapshot_root, lat);
+}
+
+static void
+latency_fork_event(struct trace_fork_event *fork_event __used,
+ struct event *event __used,
+ int cpu __used,
+ u64 timestamp __used,
+ struct thread *thread __used)
+{
+ /* should insert the newcomer */
+}
+
+static char sched_out_state(struct trace_switch_event *switch_event)
+{
+ const char *str = TASK_STATE_TO_CHAR_STR;
+
+ return str[switch_event->prev_state];
+}
+
+static void
+lat_sched_out(struct thread_latency *lat,
+ struct trace_switch_event *switch_event)
+{
+ struct lat_snapshot *snapshot;
+
+ if (sched_out_state(switch_event) == 'R')
+ return;
+
+ snapshot = calloc(sizeof(*snapshot), 1);
+ if (!snapshot)
+ die("Non memory");
+
+ list_add_tail(&snapshot->list, &lat->snapshot_list);
+}
+
+static void
+lat_sched_in(struct thread_latency *lat, u64 timestamp)
+{
+ struct lat_snapshot *snapshot;
+
+ if (list_empty(&lat->snapshot_list))
+ return;
+
+ snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
+ list);
+
+ if (snapshot->state != THREAD_WAKED_UP)
+ return;
+
+ if (timestamp < snapshot->wake_up_time) {
+ snapshot->state = THREAD_IGNORE;
+ return;
+ }
+
+ snapshot->state = THREAD_SCHED_IN;
+ snapshot->sched_in_time = timestamp;
+}
+
+
+static void
+latency_switch_event(struct trace_switch_event *switch_event,
+ struct event *event __used,
+ int cpu __used,
+ u64 timestamp,
+ struct thread *thread __used)
+{
+ struct thread_latency *out_lat, *in_lat;
+ struct thread *sched_out, *sched_in;
+
+ sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
+ sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+
+ in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
+ if (!in_lat) {
+ thread_latency_insert(sched_in);
+ in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
+ if (!in_lat)
+ die("Internal latency tree error");
+ }
+
+ out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
+ if (!out_lat) {
+ thread_latency_insert(sched_out);
+ out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
+ if (!out_lat)
+ die("Internal latency tree error");
+ }
+
+ lat_sched_in(in_lat, timestamp);
+ lat_sched_out(out_lat, switch_event);
+}
+
+static void
+latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
+ struct event *event __used,
+ int cpu __used,
+ u64 timestamp,
+ struct thread *thread __used)
+{
+ struct thread_latency *lat;
+ struct lat_snapshot *snapshot;
+ struct thread *wakee;
+
+ /* Note for later, it may be interesting to observe the failing cases */
+ if (!wakeup_event->success)
+ return;
+
+ wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
+ lat = thread_latency_search(&lat_snapshot_root, wakee);
+ if (!lat) {
+ thread_latency_insert(wakee);
+ return;
+ }
+
+ if (list_empty(&lat->snapshot_list))
+ return;
+
+ snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
+ list);
+
+ if (snapshot->state != THREAD_SLEEPING)
+ return;
+
+ snapshot->state = THREAD_WAKED_UP;
+ snapshot->wake_up_time = timestamp;
+}
+
+static struct trace_sched_handler lat_ops = {
+ .wakeup_event = latency_wakeup_event,
+ .switch_event = latency_switch_event,
+ .fork_event = latency_fork_event,
+};
+
+static void output_lat_thread(struct thread_latency *lat)
+{
+ struct lat_snapshot *shot;
+ int count = 0;
+ int i;
+ int ret;
+ u64 max = 0, avg;
+ u64 total = 0, delta;
+
+ list_for_each_entry(shot, &lat->snapshot_list, list) {
+ if (shot->state != THREAD_SCHED_IN)
+ continue;
+
+ count++;
+
+ delta = shot->sched_in_time - shot->wake_up_time;
+ if (delta > max)
+ max = delta;
+ total += delta;
+ }
+
+ if (!count)
+ return;
+
+ ret = printf("%s", lat->thread->comm);
+
+ for (i = 0; i < 25 - ret; i++)
+ printf(" ");
+
+ avg = total / count;
+
+ printf("%5d %10llu %10llu %10llu\n", count, total, avg, max);
+}
+
+static void output_lat_results(void)
+{
+ struct rb_node *next;
+
+ printf(" Tasks");
+ printf(" count");
+ printf(" total");
+ printf(" avg");
+ printf(" max\n\n");
+
+ next = rb_first(&lat_snapshot_root);
+
+ while (next) {
+ struct thread_latency *lat;
+
+ lat = rb_entry(next, struct thread_latency, node);
+ output_lat_thread(lat);
+ next = rb_next(next);
+ }
+}
static struct trace_sched_handler *trace_handler;
@@ -1154,6 +1418,8 @@ static const struct option options[] = {
"dump raw trace in ASCII"),
OPT_BOOLEAN('r', "replay", &replay_mode,
"replay sched behaviour from traces"),
+ OPT_BOOLEAN('l', "latency", &lat_mode,
+ "measure various latencies"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_END()
@@ -1180,22 +1446,30 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
if (replay_mode)
trace_handler = &replay_ops;
- else /* We may need a default subcommand */
+ else if (lat_mode)
+ trace_handler = &lat_ops;
+ else /* We may need a default subcommand (perf trace?) */
die("Please select a sub command (-r)\n");
- calibrate_run_measurement_overhead();
- calibrate_sleep_measurement_overhead();
+ if (replay_mode) {
+ calibrate_run_measurement_overhead();
+ calibrate_sleep_measurement_overhead();
- test_calibrations();
+ test_calibrations();
- parse_trace();
- print_task_traces();
- add_cross_task_wakeups();
+ parse_trace();
+ print_task_traces();
+ add_cross_task_wakeups();
- create_tasks();
- printf("------------------------------------------------------------\n");
- for (i = 0; i < nr_iterations; i++)
- run_one_test();
+ create_tasks();
+ printf("------------------------------------------------------------\n");
+ for (i = 0; i < nr_iterations; i++)
+ run_one_test();
+ } else if (lat_mode) {
+ setup_pager();
+ __cmd_sched();
+ output_lat_results();
+ }
return 0;
}