From 82f67cd9fca8c8762c15ba7ed0d5747588c1e221 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 Feb 2007 01:28:13 -0800 Subject: [PATCH] Add debugging feature /proc/timer_stat Add /proc/timer_stats support: debugging feature to profile timer expiration. Both the starting site, process/PID and the expiration function is captured. This allows the quick identification of timer event sources in a system. Sample output: # echo 1 > /proc/timer_stats # cat /proc/timer_stats Timer Stats Version: v0.1 Sample period: 4.010 s 24, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) 11, 0 swapper sk_reset_timer (tcp_delack_timer) 6, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) 2, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 17, 0 swapper hrtimer_restart_sched_tick (hrtimer_sched_tick) 2, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 4, 2050 pcscd do_nanosleep (hrtimer_wakeup) 5, 4179 sshd sk_reset_timer (tcp_write_timer) 4, 2248 yum-updatesd schedule_timeout (process_timeout) 18, 0 swapper hrtimer_restart_sched_tick (hrtimer_sched_tick) 3, 0 swapper sk_reset_timer (tcp_delack_timer) 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer) 2, 1 swapper e1000_up (e1000_watchdog) 1, 1 init schedule_timeout (process_timeout) 100 total events, 25.24 events/sec [ cleanups and hrtimers support from Thomas Gleixner ] [bunk@stusta.de: nr_entries can become static] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: john stultz Cc: Roman Zippel Cc: Andi Kleen Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timer_stats.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 kernel/time/timer_stats.c (limited to 'kernel/time/timer_stats.c') diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c new file mode 100644 index 00000000000..1bc4882e28e --- /dev/null +++ b/kernel/time/timer_stats.c @@ -0,0 +1,411 @@ +/* + * kernel/time/timer_stats.c + * + * Collect timer usage statistics. + * + * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar + * Copyright(C) 2006 Timesys Corp., Thomas Gleixner + * + * timer_stats is based on timer_top, a similar functionality which was part of + * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the + * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based + * on dynamic allocation of the statistics entries and linear search based + * lookup combined with a global lock, rather than the static array, hash + * and per-CPU locking which is used by timer_stats. It was written for the + * pre hrtimer kernel code and therefore did not take hrtimers into account. + * Nevertheless it provided the base for the timer_stats implementation and + * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks + * for this effort. + * + * timer_top.c is + * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus + * Written by Daniel Petrini + * timer_top.c was released under the GNU General Public License version 2 + * + * We export the addresses and counting of timer functions being called, + * the pid and cmdline from the owner process if applicable. + * + * Start/stop data collection: + * # echo 1[0] >/proc/timer_stats + * + * Display the information collected so far: + * # cat /proc/timer_stats + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include + +/* + * This is our basic unit of interest: a timer expiry event identified + * by the timer, its start/expire functions and the PID of the task that + * started the timer. We count the number of times an event happens: + */ +struct entry { + /* + * Hash list: + */ + struct entry *next; + + /* + * Hash keys: + */ + void *timer; + void *start_func; + void *expire_func; + pid_t pid; + + /* + * Number of timeout events: + */ + unsigned long count; + + /* + * We save the command-line string to preserve + * this information past task exit: + */ + char comm[TASK_COMM_LEN + 1]; + +} ____cacheline_aligned_in_smp; + +/* + * Spinlock protecting the tables - not taken during lookup: + */ +static DEFINE_SPINLOCK(table_lock); + +/* + * Per-CPU lookup locks for fast hash lookup: + */ +static DEFINE_PER_CPU(spinlock_t, lookup_lock); + +/* + * Mutex to serialize state changes with show-stats activities: + */ +static DEFINE_MUTEX(show_mutex); + +/* + * Collection status, active/inactive: + */ +static int __read_mostly active; + +/* + * Beginning/end timestamps of measurement: + */ +static ktime_t time_start, time_stop; + +/* + * tstat entry structs only get allocated while collection is + * active and never freed during that time - this simplifies + * things quite a bit. + * + * They get freed when a new collection period is started. + */ +#define MAX_ENTRIES_BITS 10 +#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) + +static unsigned long nr_entries; +static struct entry entries[MAX_ENTRIES]; + +static atomic_t overflow_count; + +static void reset_entries(void) +{ + nr_entries = 0; + memset(entries, 0, sizeof(entries)); + atomic_set(&overflow_count, 0); +} + +static struct entry *alloc_entry(void) +{ + if (nr_entries >= MAX_ENTRIES) + return NULL; + + return entries + nr_entries++; +} + +/* + * The entries are in a hash-table, for fast lookup: + */ +#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) +#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) +#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) + +#define __tstat_hashfn(entry) \ + (((unsigned long)(entry)->timer ^ \ + (unsigned long)(entry)->start_func ^ \ + (unsigned long)(entry)->expire_func ^ \ + (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) + +#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) + +static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; + +static int match_entries(struct entry *entry1, struct entry *entry2) +{ + return entry1->timer == entry2->timer && + entry1->start_func == entry2->start_func && + entry1->expire_func == entry2->expire_func && + entry1->pid == entry2->pid; +} + +/* + * Look up whether an entry matching this item is present + * in the hash already. Must be called with irqs off and the + * lookup lock held: + */ +static struct entry *tstat_lookup(struct entry *entry, char *comm) +{ + struct entry **head, *curr, *prev; + + head = tstat_hashentry(entry); + curr = *head; + + /* + * The fastpath is when the entry is already hashed, + * we do this with the lookup lock held, but with the + * table lock not held: + */ + while (curr) { + if (match_entries(curr, entry)) + return curr; + + curr = curr->next; + } + /* + * Slowpath: allocate, set up and link a new hash entry: + */ + prev = NULL; + curr = *head; + + spin_lock(&table_lock); + /* + * Make sure we have not raced with another CPU: + */ + while (curr) { + if (match_entries(curr, entry)) + goto out_unlock; + + prev = curr; + curr = curr->next; + } + + curr = alloc_entry(); + if (curr) { + *curr = *entry; + curr->count = 0; + memcpy(curr->comm, comm, TASK_COMM_LEN); + if (prev) + prev->next = curr; + else + *head = curr; + curr->next = NULL; + } + out_unlock: + spin_unlock(&table_lock); + + return curr; +} + +/** + * timer_stats_update_stats - Update the statistics for a timer. + * @timer: pointer to either a timer_list or a hrtimer + * @pid: the pid of the task which set up the timer + * @startf: pointer to the function which did the timer setup + * @timerf: pointer to the timer callback function of the timer + * @comm: name of the process which set up the timer + * + * When the timer is already registered, then the event counter is + * incremented. Otherwise the timer is registered in a free slot. + */ +void timer_stats_update_stats(void *timer, pid_t pid, void *startf, + void *timerf, char * comm) +{ + /* + * It doesnt matter which lock we take: + */ + spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id()); + struct entry *entry, input; + unsigned long flags; + + input.timer = timer; + input.start_func = startf; + input.expire_func = timerf; + input.pid = pid; + + spin_lock_irqsave(lock, flags); + if (!active) + goto out_unlock; + + entry = tstat_lookup(&input, comm); + if (likely(entry)) + entry->count++; + else + atomic_inc(&overflow_count); + + out_unlock: + spin_unlock_irqrestore(lock, flags); +} + +static void print_name_offset(struct seq_file *m, unsigned long addr) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf); + if (sym_name) + seq_printf(m, "%s", sym_name); + else + seq_printf(m, "<%p>", (void *)addr); +} + +static int tstats_show(struct seq_file *m, void *v) +{ + struct timespec period; + struct entry *entry; + unsigned long ms; + long events = 0; + ktime_t time; + int i; + + mutex_lock(&show_mutex); + /* + * If still active then calculate up to now: + */ + if (active) + time_stop = ktime_get(); + + time = ktime_sub(time_stop, time_start); + + period = ktime_to_timespec(time); + ms = period.tv_nsec / 1000000; + + seq_puts(m, "Timer Stats Version: v0.1\n"); + seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); + if (atomic_read(&overflow_count)) + seq_printf(m, "Overflow: %d entries\n", + atomic_read(&overflow_count)); + + for (i = 0; i < nr_entries; i++) { + entry = entries + i; + seq_printf(m, "%4lu, %5d %-16s ", + entry->count, entry->pid, entry->comm); + + print_name_offset(m, (unsigned long)entry->start_func); + seq_puts(m, " ("); + print_name_offset(m, (unsigned long)entry->expire_func); + seq_puts(m, ")\n"); + + events += entry->count; + } + + ms += period.tv_sec * 1000; + if (!ms) + ms = 1; + + if (events && period.tv_sec) + seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, + events / period.tv_sec, events * 1000 / ms); + else + seq_printf(m, "%ld total events\n", events); + + mutex_unlock(&show_mutex); + + return 0; +} + +/* + * After a state change, make sure all concurrent lookup/update + * activities have stopped: + */ +static void sync_access(void) +{ + unsigned long flags; + int cpu; + + for_each_online_cpu(cpu) { + spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); + /* nothing */ + spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); + } +} + +static ssize_t tstats_write(struct file *file, const char __user *buf, + size_t count, loff_t *offs) +{ + char ctl[2]; + + if (count != 2 || *offs) + return -EINVAL; + + if (copy_from_user(ctl, buf, count)) + return -EFAULT; + + mutex_lock(&show_mutex); + switch (ctl[0]) { + case '0': + if (active) { + active = 0; + time_stop = ktime_get(); + sync_access(); + } + break; + case '1': + if (!active) { + reset_entries(); + time_start = ktime_get(); + active = 1; + } + break; + default: + count = -EINVAL; + } + mutex_unlock(&show_mutex); + + return count; +} + +static int tstats_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, tstats_show, NULL); +} + +static struct file_operations tstats_fops = { + .open = tstats_open, + .read = seq_read, + .write = tstats_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +void __init init_timer_stats(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + spin_lock_init(&per_cpu(lookup_lock, cpu)); +} + +static int __init init_tstats_procfs(void) +{ + struct proc_dir_entry *pe; + + pe = create_proc_entry("timer_stats", 0644, NULL); + if (!pe) + return -ENOMEM; + + pe->proc_fops = &tstats_fops; + + return 0; +} +__initcall(init_tstats_procfs); -- cgit v1.2.3