From 2b022e3d4bf9885f781221c59d86283a2cdfc2ed Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:48:59 +0800 Subject: timers: Add tracepoints for timer_list timers Add tracepoints which cover the timer life cycle. The tracepoints are integrated with the already existing debug_object debug points as far as possible. Based on patches from Mathieu: http://marc.info/?l=linux-kernel&m=123791201816247&w=2 and Anton: http://marc.info/?l=linux-kernel&m=124331396919301&w=2 [ tglx: Fixed timeout value in timer_start tracepoint, massaged comments and made the printk's more readable ] Signed-off-by: Xiao Guangrong Cc: Anton Blanchard Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8A9B.3040201@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 137 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 include/trace/events/timer.h (limited to 'include/trace/events') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h new file mode 100644 index 00000000000..725892a93b4 --- /dev/null +++ b/include/trace/events/timer.h @@ -0,0 +1,137 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM timer + +#if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TIMER_H + +#include +#include + +/** + * timer_init - called when the timer is initialized + * @timer: pointer to struct timer_list + */ +TRACE_EVENT(timer_init, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +/** + * timer_start - called when the timer is started + * @timer: pointer to struct timer_list + * @expires: the timers expiry time + */ +TRACE_EVENT(timer_start, + + TP_PROTO(struct timer_list *timer, unsigned long expires), + + TP_ARGS(timer, expires), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( void *, function ) + __field( unsigned long, expires ) + __field( unsigned long, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->function = timer->function; + __entry->expires = expires; + __entry->now = jiffies; + ), + + TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + __entry->timer, __entry->function, __entry->expires, + (long)__entry->expires - __entry->now) +); + +/** + * timer_expire_entry - called immediately before the timer callback + * @timer: pointer to struct timer_list + * + * Allows to determine the timer latency. + */ +TRACE_EVENT(timer_expire_entry, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( unsigned long, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->now = jiffies; + ), + + TP_printk("timer %p: now %lu", __entry->timer, __entry->now) +); + +/** + * timer_expire_exit - called immediately after the timer callback returns + * @timer: pointer to struct timer_list + * + * When used in combination with the timer_expire_entry tracepoint we can + * determine the runtime of the timer callback function. + * + * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might + * be invalid. We solely track the pointer. + */ +TRACE_EVENT(timer_expire_exit, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field(void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +/** + * timer_cancel - called when the timer is canceled + * @timer: pointer to struct timer_list + */ +TRACE_EVENT(timer_cancel, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +#endif /* _TRACE_TIMER_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From c6a2a1770245f654f35f60e1458d4356680f9519 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:51:23 +0800 Subject: hrtimer: Add tracepoint for hrtimers Add tracepoints which cover the life cycle of a hrtimer. The tracepoints are integrated with the already existing debug_object debug points as far as possible. [ tglx: Fixed comments, made output conistent, easier to read and parse. Fixed output for 32bit archs which do not use the scalar representation of ktime_t. Hand current time to trace_hrtimer_expiry_entry instead of calling get_time() inside of the trace assignment. ] Signed-off-by: Xiao Guangrong Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Anton Blanchard Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8B2B.5020908@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 139 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 725892a93b4..df3c07fa0cb 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -5,6 +5,7 @@ #define _TRACE_TIMER_H #include +#include #include /** @@ -131,6 +132,144 @@ TRACE_EVENT(timer_cancel, TP_printk("timer %p", __entry->timer) ); +/** + * hrtimer_init - called when the hrtimer is initialized + * @timer: pointer to struct hrtimer + * @clockid: the hrtimers clock + * @mode: the hrtimers mode + */ +TRACE_EVENT(hrtimer_init, + + TP_PROTO(struct hrtimer *timer, clockid_t clockid, + enum hrtimer_mode mode), + + TP_ARGS(timer, clockid, mode), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( clockid_t, clockid ) + __field( enum hrtimer_mode, mode ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->clockid = clockid; + __entry->mode = mode; + ), + + TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + __entry->clockid == CLOCK_REALTIME ? + "CLOCK_REALTIME" : "CLOCK_MONOTONIC", + __entry->mode == HRTIMER_MODE_ABS ? + "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") +); + +/** + * hrtimer_start - called when the hrtimer is started + * @timer: pointer to struct hrtimer + */ +TRACE_EVENT(hrtimer_start, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( void *, function ) + __field( s64, expires ) + __field( s64, softexpires ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->function = timer->function; + __entry->expires = hrtimer_get_expires(timer).tv64; + __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + ), + + TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", + __entry->timer, __entry->function, + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->expires }), + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->softexpires })) +); + +/** + * htimmer_expire_entry - called immediately before the hrtimer callback + * @timer: pointer to struct hrtimer + * @now: pointer to variable which contains current time of the + * timers base. + * + * Allows to determine the timer latency. + */ +TRACE_EVENT(hrtimer_expire_entry, + + TP_PROTO(struct hrtimer *timer, ktime_t *now), + + TP_ARGS(timer, now), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( s64, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->now = now->tv64; + ), + + TP_printk("hrtimer %p, now %llu", __entry->timer, + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->now })) + ); + +/** + * hrtimer_expire_exit - called immediately after the hrtimer callback returns + * @timer: pointer to struct hrtimer + * + * When used in combination with the hrtimer_expire_entry tracepoint we can + * determine the runtime of the callback function. + */ +TRACE_EVENT(hrtimer_expire_exit, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("hrtimer %p", __entry->timer) +); + +/** + * hrtimer_cancel - called when the hrtimer is canceled + * @timer: pointer to struct hrtimer + */ +TRACE_EVENT(hrtimer_cancel, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("hrtimer %p", __entry->timer) +); + #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 3f0a525ebf4b8ef041a332bbe4a73aee94bb064b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:52:30 +0800 Subject: itimers: Add tracepoints for itimer Add tracepoints for all itimer variants: ITIMER_REAL, ITIMER_VIRTUAL and ITIMER_PROF. [ tglx: Fixed comments and made the output more readable, parseable and consistent. Replaced pid_vnr by pid_nr because the hrtimer callback can happen in any namespace ] Signed-off-by: Xiao Guangrong Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Anton Blanchard Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8B6E.2010109@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 66 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index df3c07fa0cb..1844c48d640 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -270,6 +270,72 @@ TRACE_EVENT(hrtimer_cancel, TP_printk("hrtimer %p", __entry->timer) ); +/** + * itimer_state - called when itimer is started or canceled + * @which: name of the interval timer + * @value: the itimers value, itimer is canceled if value->it_value is + * zero, otherwise it is started + * @expires: the itimers expiry time + */ +TRACE_EVENT(itimer_state, + + TP_PROTO(int which, const struct itimerval *const value, + cputime_t expires), + + TP_ARGS(which, value, expires), + + TP_STRUCT__entry( + __field( int, which ) + __field( cputime_t, expires ) + __field( long, value_sec ) + __field( long, value_usec ) + __field( long, interval_sec ) + __field( long, interval_usec ) + ), + + TP_fast_assign( + __entry->which = which; + __entry->expires = expires; + __entry->value_sec = value->it_value.tv_sec; + __entry->value_usec = value->it_value.tv_usec; + __entry->interval_sec = value->it_interval.tv_sec; + __entry->interval_usec = value->it_interval.tv_usec; + ), + + TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + __entry->which, __entry->expires, + __entry->value_sec, __entry->value_usec, + __entry->interval_sec, __entry->interval_usec) +); + +/** + * itimer_expire - called when itimer expires + * @which: type of the interval timer + * @pid: pid of the process which owns the timer + * @now: current time, used to calculate the latency of itimer + */ +TRACE_EVENT(itimer_expire, + + TP_PROTO(int which, struct pid *pid, cputime_t now), + + TP_ARGS(which, pid, now), + + TP_STRUCT__entry( + __field( int , which ) + __field( pid_t, pid ) + __field( cputime_t, now ) + ), + + TP_fast_assign( + __entry->which = which; + __entry->now = now; + __entry->pid = pid_nr(pid); + ), + + TP_printk("which %d, pid %d, now %lu", __entry->which, + (int) __entry->pid, __entry->now) +); + #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 98a56ab382079f777e261e14512cbd4fb2107af4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Sep 2009 08:48:28 -0400 Subject: ext4: Fix spelling typo in the trace format for trace_ext4_da_writepages() Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 8d433c4e370..15051d2d121 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -243,7 +243,7 @@ TRACE_EVENT(ext4_da_writepages, __entry->range_cyclic = wbc->range_cyclic; ), - TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", + TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, -- cgit v1.2.3 From b3a3ca8ca0c3c29abc5b2bfe94bb14f3f4590df9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Aug 2009 23:13:11 -0400 Subject: ext4: Add new tracepoint: trace_ext4_da_write_pages() Add a new tracepoint which shows the pages that will be written using write_cache_pages() by ext4_da_writepages(). Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 15051d2d121..dd43399288e 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -251,6 +251,40 @@ TRACE_EVENT(ext4_da_writepages, __entry->range_cyclic) ); +TRACE_EVENT(ext4_da_write_pages, + TP_PROTO(struct inode *inode, struct mpage_da_data *mpd), + + TP_ARGS(inode, mpd), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, b_blocknr ) + __field( __u32, b_size ) + __field( __u32, b_state ) + __field( unsigned long, first_page ) + __field( int, io_done ) + __field( int, pages_written ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->b_blocknr = mpd->b_blocknr; + __entry->b_size = mpd->b_size; + __entry->b_state = mpd->b_state; + __entry->first_page = mpd->first_page; + __entry->io_done = mpd->io_done; + __entry->pages_written = mpd->pages_written; + ), + + TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", + jbd2_dev_to_name(__entry->dev), __entry->ino, + __entry->b_blocknr, __entry->b_size, + __entry->b_state, __entry->first_page, + __entry->io_done, __entry->pages_written) +); + TRACE_EVENT(ext4_da_writepages_result, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int ret, int pages_written), -- cgit v1.2.3 From a3710fd1ee8cd542c5de63cf2c39f8912031f867 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Sep 2009 08:50:18 -0400 Subject: ext4: fix tracepoint format string warnings Unlike on some other architectures ino_t is an unsigned int on s390. So add an explicit cast to avoid lots of compile warnings: In file included from include/trace/ftrace.h:285, from include/trace/define_trace.h:61, from include/trace/events/ext4.h:711, from fs/ext4/super.c:50: include/trace/events/ext4.h: In function 'ftrace_raw_output_ext4_free_inode': include/trace/events/ext4.h:12: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'ino_t' Signed-off-by: Heiko Carstens Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 75 ++++++++++++++++++++++++--------------------- include/trace/events/jbd2.h | 2 +- 2 files changed, 41 insertions(+), 36 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index dd43399288e..68b53c7ef8a 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -33,8 +33,8 @@ TRACE_EVENT(ext4_free_inode, ), TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode, - __entry->uid, __entry->gid, + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->mode, __entry->uid, __entry->gid, (unsigned long long) __entry->blocks) ); @@ -56,7 +56,8 @@ TRACE_EVENT(ext4_request_inode, ), TP_printk("dev %s dir %lu mode %d", - jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir, + __entry->mode) ); TRACE_EVENT(ext4_allocate_inode, @@ -79,7 +80,8 @@ TRACE_EVENT(ext4_allocate_inode, ), TP_printk("dev %s ino %lu dir %lu mode %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_write_begin, @@ -106,8 +108,8 @@ TRACE_EVENT(ext4_write_begin, ), TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->flags) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->flags) ); TRACE_EVENT(ext4_ordered_write_end, @@ -133,8 +135,8 @@ TRACE_EVENT(ext4_ordered_write_end, ), TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->copied) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->copied) ); TRACE_EVENT(ext4_writeback_write_end, @@ -160,8 +162,8 @@ TRACE_EVENT(ext4_writeback_write_end, ), TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->copied) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->copied) ); TRACE_EVENT(ext4_journalled_write_end, @@ -186,8 +188,8 @@ TRACE_EVENT(ext4_journalled_write_end, ), TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->copied) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->copied) ); TRACE_EVENT(ext4_writepage, @@ -209,7 +211,8 @@ TRACE_EVENT(ext4_writepage, ), TP_printk("dev %s ino %lu page_index %lu", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->index) ); TRACE_EVENT(ext4_da_writepages, @@ -244,7 +247,8 @@ TRACE_EVENT(ext4_da_writepages, ), TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, + jbd2_dev_to_name(__entry->dev), + (unsigned long) __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, __entry->for_kupdate, __entry->for_reclaim, @@ -279,7 +283,7 @@ TRACE_EVENT(ext4_da_write_pages, ), TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, __entry->b_blocknr, __entry->b_size, __entry->b_state, __entry->first_page, __entry->io_done, __entry->pages_written) @@ -314,7 +318,8 @@ TRACE_EVENT(ext4_da_writepages_result, ), TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret, + jbd2_dev_to_name(__entry->dev), + (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, __entry->encountered_congestion, __entry->more_io, __entry->no_nrwrite_index_update) @@ -343,8 +348,8 @@ TRACE_EVENT(ext4_da_write_begin, ), TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->flags) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->flags) ); TRACE_EVENT(ext4_da_write_end, @@ -370,8 +375,8 @@ TRACE_EVENT(ext4_da_write_end, ), TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->copied) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->copied) ); TRACE_EVENT(ext4_discard_blocks, @@ -421,8 +426,8 @@ TRACE_EVENT(ext4_mb_new_inode_pa, ), TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, - __entry->pa_len, __entry->pa_lstart) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) ); TRACE_EVENT(ext4_mb_new_group_pa, @@ -449,8 +454,8 @@ TRACE_EVENT(ext4_mb_new_group_pa, ), TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, - __entry->pa_len, __entry->pa_lstart) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) ); TRACE_EVENT(ext4_mb_release_inode_pa, @@ -476,8 +481,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ), TP_printk("dev %s ino %lu block %llu count %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, - __entry->count) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->block, __entry->count) ); TRACE_EVENT(ext4_mb_release_group_pa, @@ -522,7 +527,7 @@ TRACE_EVENT(ext4_discard_preallocations, ), TP_printk("dev %s ino %lu", - jbd2_dev_to_name(__entry->dev), __entry->ino) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_mb_discard_preallocations, @@ -577,8 +582,8 @@ TRACE_EVENT(ext4_request_blocks, ), TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, - __entry->len, + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->flags, __entry->len, (unsigned long long) __entry->logical, (unsigned long long) __entry->goal, (unsigned long long) __entry->lleft, @@ -621,8 +626,8 @@ TRACE_EVENT(ext4_allocate_blocks, ), TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, - __entry->len, __entry->block, + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->flags, __entry->len, __entry->block, (unsigned long long) __entry->logical, (unsigned long long) __entry->goal, (unsigned long long) __entry->lleft, @@ -655,8 +660,8 @@ TRACE_EVENT(ext4_free_blocks, ), TP_printk("dev %s ino %lu block %llu count %lu metadata %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, - __entry->count, __entry->metadata) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->block, __entry->count, __entry->metadata) ); TRACE_EVENT(ext4_sync_file, @@ -679,8 +684,8 @@ TRACE_EVENT(ext4_sync_file, ), TP_printk("dev %s ino %ld parent %ld datasync %d ", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent, - __entry->datasync) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + (unsigned long) __entry->parent, __entry->datasync) ); TRACE_EVENT(ext4_sync_fs, diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 10813fa0c8d..b851f0b4701 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -159,7 +159,7 @@ TRACE_EVENT(jbd2_submit_inode_data, ), TP_printk("dev %s ino %lu", - jbd2_dev_to_name(__entry->dev), __entry->ino) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) ); #endif /* _TRACE_JBD2_H */ -- cgit v1.2.3 From f977bb4937857994312fff4f9c2cad336a36a932 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Sep 2009 18:15:54 +0200 Subject: perf_counter, sched: Add sched_stat_runtime tracepoint This allows more precise tracking of how the scheduler accounts (and acts upon) a task having spent N nanoseconds of CPU time. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b48f1ad7c94..4069c43f418 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -379,6 +379,39 @@ TRACE_EVENT(sched_stat_wait, (unsigned long long)__entry->delay) ); +/* + * Tracepoint for accounting runtime (time the task is executing + * on a CPU). + */ +TRACE_EVENT(sched_stat_runtime, + + TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), + + TP_ARGS(tsk, runtime, vruntime), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, runtime ) + __field( u64, vruntime ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->runtime = runtime; + __entry->vruntime = vruntime; + ) + TP_perf_assign( + __perf_count(runtime); + ), + + TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->runtime, + (unsigned long long)__entry->vruntime) +); + /* * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). -- cgit v1.2.3 From 3661d28615ea580c1db02a972fd4d3898df1cb01 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 14 Sep 2009 22:59:50 -0400 Subject: ext4: Fix include/trace/events/ext4.h to work with Systemtap Using relative pathnames in #include statements interacts badly with SystemTap, since the fs/ext4/*.h header files are not packaged up as part of a distribution kernel's header files. Since systemtap doesn't use TP_fast_assign(), we can use a blind structure definition and then make sure the needed header files are defined before the ext4 source files #include the trace/events/ext4.h header file. https://bugzilla.redhat.com/show_bug.cgi?id=512478 Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 68b53c7ef8a..6fe6ce9ee07 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -5,10 +5,12 @@ #define _TRACE_EXT4_H #include -#include "../../../fs/ext4/ext4.h" -#include "../../../fs/ext4/mballoc.h" #include +struct ext4_allocation_context; +struct ext4_allocation_request; +struct ext4_prealloc_space; + TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), -- cgit v1.2.3 From fb40ba0d98968bc3454731360363d725b4f1064c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 16 Sep 2009 19:30:40 -0400 Subject: ext4: Add a tracepoint for ext4_alloc_da_blocks() Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6fe6ce9ee07..c1bd8f1e8b9 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -10,6 +10,9 @@ struct ext4_allocation_context; struct ext4_allocation_request; struct ext4_prealloc_space; +struct ext4_inode_info; + +#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), @@ -710,6 +713,30 @@ TRACE_EVENT(ext4_sync_fs, __entry->wait) ); +TRACE_EVENT(ext4_alloc_da_blocks, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( unsigned int, data_blocks ) + __field( unsigned int, meta_blocks ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; + __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; + ), + + TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->data_blocks, __entry->meta_blocks) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 6161352142d5fed4cd753b32e5ccde66e705b14e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 17 Sep 2009 16:11:28 +0200 Subject: tracing, perf: Convert the power tracer into an event tracer This patch converts the existing power tracer into an event tracer, so that power events (C states and frequency changes) can be tracked via "perf". This also removes the perl script that was used to demo the tracer; its functionality is being replaced entirely with timechart. Signed-off-by: Arjan van de Ven Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090912130542.6d314860@infradead.org> Signed-off-by: Ingo Molnar --- include/trace/events/power.h | 81 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 include/trace/events/power.h (limited to 'include/trace/events') diff --git a/include/trace/events/power.h b/include/trace/events/power.h new file mode 100644 index 00000000000..ea6d579261a --- /dev/null +++ b/include/trace/events/power.h @@ -0,0 +1,81 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power + +#if !defined(_TRACE_POWER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_H + +#include +#include + +#ifndef _TRACE_POWER_ENUM_ +#define _TRACE_POWER_ENUM_ +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; +#endif + + + +TRACE_EVENT(power_start, + + TP_PROTO(unsigned int type, unsigned int state), + + TP_ARGS(type, state), + + TP_STRUCT__entry( + __field( u64, type ) + __field( u64, state ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->state = state; + ), + + TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) +); + +TRACE_EVENT(power_end, + + TP_PROTO(int dummy), + + TP_ARGS(dummy), + + TP_STRUCT__entry( + __field( u64, dummy ) + ), + + TP_fast_assign( + __entry->dummy = 0xffff; + ), + + TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + +); + + +TRACE_EVENT(power_frequency, + + TP_PROTO(unsigned int type, unsigned int state), + + TP_ARGS(type, state), + + TP_STRUCT__entry( + __field( u64, type ) + __field( u64, state ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->state = state; + ), + + TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) +); + +#endif /* _TRACE_POWER_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 4b4f278c030aa4b6ee0915f396e9a9478d92d610 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:41 -0700 Subject: tracing, page-allocator: add trace events for page allocation and page freeing This patch adds trace events for the allocation and freeing of pages, including the freeing of pagevecs. Using the events, it will be known what struct page and pfns are being allocated and freed and what the call site was in many cases. The page alloc tracepoints be used as an indicator as to whether the workload was heavily dependant on the page allocator or not. You can make a guess based on vmstat but you can't get a per-process breakdown. Depending on the call path, the call_site for page allocation may be __get_free_pages() instead of a useful callsite. Instead of passing down a return address similar to slab debugging, the user should enable the stacktrace and seg-addr options to get a proper stack trace. The pagevec free tracepoint has a different usecase. It can be used to get a idea of how many pages are being dumped off the LRU and whether it is kswapd doing the work or a process doing direct reclaim. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 74 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 1493c541f9c..0d358a0d45c 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -225,6 +225,80 @@ TRACE_EVENT(kmem_cache_free, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); + +TRACE_EVENT(mm_page_free_direct, + + TP_PROTO(struct page *page, unsigned int order), + + TP_ARGS(page, order), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + ), + + TP_printk("page=%p pfn=%lu order=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order) +); + +TRACE_EVENT(mm_pagevec_free, + + TP_PROTO(struct page *page, int cold), + + TP_ARGS(page, cold), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, cold ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->cold = cold; + ), + + TP_printk("page=%p pfn=%lu order=0 cold=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->cold) +); + +TRACE_EVENT(mm_page_alloc, + + TP_PROTO(struct page *page, unsigned int order, + gfp_t gfp_flags, int migratetype), + + TP_ARGS(page, order, gfp_flags, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + __field( gfp_t, gfp_flags ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->gfp_flags = gfp_flags; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype, + show_gfp_flags(__entry->gfp_flags)) +); + #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ -- cgit v1.2.3 From e0fff1bd12469c45dab088e353d8882761387bb6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:42 -0700 Subject: tracing, page-allocator: add trace events for anti-fragmentation falling back to other migratetypes Fragmentation avoidance depends on being able to use free pages from lists of the appropriate migrate type. In the event this is not possible, __rmqueue_fallback() selects a different list and in some circumstances change the migratetype of the pageblock. Simplistically, the more times this event occurs, the more likely that fragmentation will be a problem later for hugepage allocation at least but there are other considerations such as the order of page being split to satisfy the allocation. This patch adds a trace event for __rmqueue_fallback() that reports what page is being used for the fallback, the orders of relevant pages, the desired migratetype and the migratetype of the lists being used, whether the pageblock changed type and whether this event is important with respect to fragmentation avoidance or not. This information can be used to help analyse fragmentation avoidance and help decide whether min_free_kbytes should be increased or not. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 0d358a0d45c..aae16ee1760 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -299,6 +299,44 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); +TRACE_EVENT(mm_page_alloc_extfrag, + + TP_PROTO(struct page *page, + int alloc_order, int fallback_order, + int alloc_migratetype, int fallback_migratetype), + + TP_ARGS(page, + alloc_order, fallback_order, + alloc_migratetype, fallback_migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, alloc_order ) + __field( int, fallback_order ) + __field( int, alloc_migratetype ) + __field( int, fallback_migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->alloc_order = alloc_order; + __entry->fallback_order = fallback_order; + __entry->alloc_migratetype = alloc_migratetype; + __entry->fallback_migratetype = fallback_migratetype; + ), + + TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->alloc_order, + __entry->fallback_order, + pageblock_order, + __entry->alloc_migratetype, + __entry->fallback_migratetype, + __entry->fallback_order < pageblock_order, + __entry->alloc_migratetype == __entry->fallback_migratetype) +); + #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 0d3d062a6e289e065bd0aa537a6806a1806bf8aa Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:44 -0700 Subject: tracing, page-allocator: add trace event for page traffic related to the buddy lists The page allocation trace event reports that a page was successfully allocated but it does not specify where it came from. When analysing performance, it can be important to distinguish between pages coming from the per-cpu allocator and pages coming from the buddy lists as the latter requires the zone lock to the taken and more data structures to be examined. This patch adds a trace event for __rmqueue reporting when a page is being allocated from the buddy lists. It distinguishes between being called to refill the per-cpu lists or whether it is a high-order allocation. Similarly, this patch adds an event to catch when the PCP lists are being drained a little and pages are going back to the buddy lists. This is trickier to draw conclusions from but high activity on those events could explain why there were a large number of cache misses on a page-allocator-intensive workload. The coalescing and splitting of buddies involves a lot of writing of page metadata and cache line bounces not to mention the acquisition of an interrupt-safe lock necessary to enter this path. [akpm@linux-foundation.org: fix build] Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 51 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index aae16ee1760..eaf46bdd18a 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -299,6 +299,57 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); +TRACE_EVENT(mm_page_alloc_zone_locked, + + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype, + __entry->order == 0) +); + +TRACE_EVENT(mm_page_pcpu_drain, + + TP_PROTO(struct page *page, int order, int migratetype), + + TP_ARGS(page, order, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%d migratetype=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype) +); + TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, -- cgit v1.2.3 From 9961079348d43dddb1f21118c17f054f63bbaa05 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 22 Sep 2009 12:40:33 +1000 Subject: tracing/workqueue: Use %pf in workqueue trace events Using %pf instead of %pF supresses printing of the function offset which will always be 0 in the case of worklet functions. Signed-off-by: Anton Blanchard Cc: KOSAKI Motohiro Cc: Li Zefan Cc: Zhaolei Cc: Lai Jiangshan Cc: Ingo Molnar Cc: Steven Rostedt LKML-Reference: <20090922024033.GB31801@kryten> Signed-off-by: Frederic Weisbecker --- include/trace/events/workqueue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index fcfd9a1e4b9..e4612dbd7ba 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -26,7 +26,7 @@ TRACE_EVENT(workqueue_insertion, __entry->func = work->func; ), - TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, __entry->thread_pid, __entry->func) ); @@ -48,7 +48,7 @@ TRACE_EVENT(workqueue_execution, __entry->func = work->func; ), - TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, __entry->thread_pid, __entry->func) ); -- cgit v1.2.3 From 55138e0bc29c0751e2152df9ad35deea542f29b3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 29 Sep 2009 13:31:31 -0400 Subject: ext4: Adjust ext4_da_writepages() to write out larger contiguous chunks Work around problems in the writeback code to force out writebacks in larger chunks than just 4mb, which is just too small. This also works around limitations in the ext4 block allocator, which can't allocate more than 2048 blocks at a time. So we need to defeat the round-robin characteristics of the writeback code and try to write out as many blocks in one inode before allowing the writeback code to move on to another inode. We add a a new per-filesystem tunable, max_writeback_mb_bump, which caps this to a default of 128mb per inode. Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index c1bd8f1e8b9..7c6bbb7198a 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -236,6 +236,7 @@ TRACE_EVENT(ext4_da_writepages, __field( char, for_kupdate ) __field( char, for_reclaim ) __field( char, range_cyclic ) + __field( pgoff_t, writeback_index ) ), TP_fast_assign( @@ -249,15 +250,17 @@ TRACE_EVENT(ext4_da_writepages, __entry->for_kupdate = wbc->for_kupdate; __entry->for_reclaim = wbc->for_reclaim; __entry->range_cyclic = wbc->range_cyclic; + __entry->writeback_index = inode->i_mapping->writeback_index; ), - TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", + TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, __entry->for_kupdate, __entry->for_reclaim, - __entry->range_cyclic) + __entry->range_cyclic, + (unsigned long) __entry->writeback_index) ); TRACE_EVENT(ext4_da_write_pages, @@ -309,6 +312,7 @@ TRACE_EVENT(ext4_da_writepages_result, __field( char, encountered_congestion ) __field( char, more_io ) __field( char, no_nrwrite_index_update ) + __field( pgoff_t, writeback_index ) ), TP_fast_assign( @@ -320,14 +324,16 @@ TRACE_EVENT(ext4_da_writepages_result, __entry->encountered_congestion = wbc->encountered_congestion; __entry->more_io = wbc->more_io; __entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update; + __entry->writeback_index = inode->i_mapping->writeback_index; ), - TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d", + TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d writeback_index %lu", jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, __entry->encountered_congestion, __entry->more_io, - __entry->no_nrwrite_index_update) + __entry->no_nrwrite_index_update, + (unsigned long) __entry->writeback_index) ); TRACE_EVENT(ext4_da_write_begin, -- cgit v1.2.3 From 296c355cd6443d89fa251885a8d78778fe111dc4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 30 Sep 2009 00:32:42 -0400 Subject: ext4: Use tracepoints for mb_history trace file The /proc/fs/ext4//mb_history was maintained manually, and had a number of problems: it required a largish amount of memory to be allocated for each ext4 filesystem, and the s_mb_history_lock introduced a CPU contention problem. By ripping out the mb_history code and replacing it with ftrace tracepoints, and we get more functionality: timestamps, event filtering, the ability to correlate mballoc history with other ext4 tracepoints, etc. Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 163 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 7c6bbb7198a..b8320256dc5 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -743,6 +743,169 @@ TRACE_EVENT(ext4_alloc_da_blocks, __entry->data_blocks, __entry->meta_blocks) ); +TRACE_EVENT(ext4_mballoc_alloc, + TP_PROTO(struct ext4_allocation_context *ac), + + TP_ARGS(ac), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u16, found ) + __field( __u16, groups ) + __field( __u16, buddy ) + __field( __u16, flags ) + __field( __u16, tail ) + __field( __u8, cr ) + __field( __u32, orig_logical ) + __field( int, orig_start ) + __field( __u32, orig_group ) + __field( int, orig_len ) + __field( __u32, goal_logical ) + __field( int, goal_start ) + __field( __u32, goal_group ) + __field( int, goal_len ) + __field( __u32, result_logical ) + __field( int, result_start ) + __field( __u32, result_group ) + __field( int, result_len ) + ), + + TP_fast_assign( + __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->found = ac->ac_found; + __entry->flags = ac->ac_flags; + __entry->groups = ac->ac_groups_scanned; + __entry->buddy = ac->ac_buddy; + __entry->tail = ac->ac_tail; + __entry->cr = ac->ac_criteria; + __entry->orig_logical = ac->ac_o_ex.fe_logical; + __entry->orig_start = ac->ac_o_ex.fe_start; + __entry->orig_group = ac->ac_o_ex.fe_group; + __entry->orig_len = ac->ac_o_ex.fe_len; + __entry->goal_logical = ac->ac_g_ex.fe_logical; + __entry->goal_start = ac->ac_g_ex.fe_start; + __entry->goal_group = ac->ac_g_ex.fe_group; + __entry->goal_len = ac->ac_g_ex.fe_len; + __entry->result_logical = ac->ac_f_ex.fe_logical; + __entry->result_start = ac->ac_f_ex.fe_start; + __entry->result_group = ac->ac_f_ex.fe_group; + __entry->result_len = ac->ac_f_ex.fe_len; + ), + + TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " + "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x " + "tail %u broken %u", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->orig_group, __entry->orig_start, + __entry->orig_len, __entry->orig_logical, + __entry->goal_group, __entry->goal_start, + __entry->goal_len, __entry->goal_logical, + __entry->result_group, __entry->result_start, + __entry->result_len, __entry->result_logical, + __entry->found, __entry->groups, __entry->cr, + __entry->flags, __entry->tail, + __entry->buddy ? 1 << __entry->buddy : 0) +); + +TRACE_EVENT(ext4_mballoc_prealloc, + TP_PROTO(struct ext4_allocation_context *ac), + + TP_ARGS(ac), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u32, orig_logical ) + __field( int, orig_start ) + __field( __u32, orig_group ) + __field( int, orig_len ) + __field( __u32, result_logical ) + __field( int, result_start ) + __field( __u32, result_group ) + __field( int, result_len ) + ), + + TP_fast_assign( + __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->orig_logical = ac->ac_o_ex.fe_logical; + __entry->orig_start = ac->ac_o_ex.fe_start; + __entry->orig_group = ac->ac_o_ex.fe_group; + __entry->orig_len = ac->ac_o_ex.fe_len; + __entry->result_logical = ac->ac_b_ex.fe_logical; + __entry->result_start = ac->ac_b_ex.fe_start; + __entry->result_group = ac->ac_b_ex.fe_group; + __entry->result_len = ac->ac_b_ex.fe_len; + ), + + TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->orig_group, __entry->orig_start, + __entry->orig_len, __entry->orig_logical, + __entry->result_group, __entry->result_start, + __entry->result_len, __entry->result_logical) +); + +TRACE_EVENT(ext4_mballoc_discard, + TP_PROTO(struct ext4_allocation_context *ac), + + TP_ARGS(ac), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u32, result_logical ) + __field( int, result_start ) + __field( __u32, result_group ) + __field( int, result_len ) + ), + + TP_fast_assign( + __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->result_logical = ac->ac_b_ex.fe_logical; + __entry->result_start = ac->ac_b_ex.fe_start; + __entry->result_group = ac->ac_b_ex.fe_group; + __entry->result_len = ac->ac_b_ex.fe_len; + ), + + TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->result_group, __entry->result_start, + __entry->result_len, __entry->result_logical) +); + +TRACE_EVENT(ext4_mballoc_free, + TP_PROTO(struct ext4_allocation_context *ac), + + TP_ARGS(ac), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u32, result_logical ) + __field( int, result_start ) + __field( __u32, result_group ) + __field( int, result_len ) + ), + + TP_fast_assign( + __entry->dev = ac->ac_inode->i_sb->s_dev; + __entry->ino = ac->ac_inode->i_ino; + __entry->result_logical = ac->ac_b_ex.fe_logical; + __entry->result_start = ac->ac_b_ex.fe_start; + __entry->result_group = ac->ac_b_ex.fe_group; + __entry->result_len = ac->ac_b_ex.fe_len; + ), + + TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->result_group, __entry->result_start, + __entry->result_len, __entry->result_logical) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.2.3 From bf6993276f74d46776f35c45ddef29b981b1d1c6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 30 Sep 2009 00:32:06 -0400 Subject: jbd2: Use tracepoints for history file The /proc/fs/jbd2//history was maintained manually; by using tracepoints, we can get all of the existing functionality of the /proc file plus extra capabilities thanks to the ftrace infrastructure. We save memory as a bonus. Signed-off-by: "Theodore Ts'o" --- include/trace/events/jbd2.h | 78 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'include/trace/events') diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index b851f0b4701..3c60b75adb9 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -7,6 +7,9 @@ #include #include +struct transaction_chp_stats_s; +struct transaction_run_stats_s; + TRACE_EVENT(jbd2_checkpoint, TP_PROTO(journal_t *journal, int result), @@ -162,6 +165,81 @@ TRACE_EVENT(jbd2_submit_inode_data, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) ); +TRACE_EVENT(jbd2_run_stats, + TP_PROTO(dev_t dev, unsigned long tid, + struct transaction_run_stats_s *stats), + + TP_ARGS(dev, tid, stats), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( unsigned long, tid ) + __field( unsigned long, wait ) + __field( unsigned long, running ) + __field( unsigned long, locked ) + __field( unsigned long, flushing ) + __field( unsigned long, logging ) + __field( __u32, handle_count ) + __field( __u32, blocks ) + __field( __u32, blocks_logged ) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->tid = tid; + __entry->wait = stats->rs_wait; + __entry->running = stats->rs_running; + __entry->locked = stats->rs_locked; + __entry->flushing = stats->rs_flushing; + __entry->logging = stats->rs_logging; + __entry->handle_count = stats->rs_handle_count; + __entry->blocks = stats->rs_blocks; + __entry->blocks_logged = stats->rs_blocks_logged; + ), + + TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u " + "logging %u handle_count %u blocks %u blocks_logged %u", + jbd2_dev_to_name(__entry->dev), __entry->tid, + jiffies_to_msecs(__entry->wait), + jiffies_to_msecs(__entry->running), + jiffies_to_msecs(__entry->locked), + jiffies_to_msecs(__entry->flushing), + jiffies_to_msecs(__entry->logging), + __entry->handle_count, __entry->blocks, + __entry->blocks_logged) +); + +TRACE_EVENT(jbd2_checkpoint_stats, + TP_PROTO(dev_t dev, unsigned long tid, + struct transaction_chp_stats_s *stats), + + TP_ARGS(dev, tid, stats), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( unsigned long, tid ) + __field( unsigned long, chp_time ) + __field( __u32, forced_to_close ) + __field( __u32, written ) + __field( __u32, dropped ) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->tid = tid; + __entry->chp_time = stats->cs_chp_time; + __entry->forced_to_close= stats->cs_forced_to_close; + __entry->written = stats->cs_written; + __entry->dropped = stats->cs_dropped; + ), + + TP_printk("dev %s tid %lu chp_time %u forced_to_close %u " + "written %u dropped %u", + jbd2_dev_to_name(__entry->dev), __entry->tid, + jiffies_to_msecs(__entry->chp_time), + __entry->forced_to_close, __entry->written, __entry->dropped) +); + #endif /* _TRACE_JBD2_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 0ef122494020521309be855bfdeeb41f34bf8c94 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 30 Sep 2009 00:51:22 -0400 Subject: ext4: Add a stub for mpage_da_data in the trace header The tracepoint ext4_da_write_pages has a struct mpage_da_data* parameter, but that struct is only defined in fs/ext4/ext4.h. This patch adds a forward declaration for that struct, so this tracepoint header can still be used by tools like SystemTap. This is a continuation of the fix in commit 3661d286. http://sourceware.org/bugzilla/show_bug.cgi?id=10703 Signed-off-by: Josh Stone Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace/events') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index b8320256dc5..d09550bf3f9 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -11,6 +11,7 @@ struct ext4_allocation_context; struct ext4_allocation_request; struct ext4_prealloc_space; struct ext4_inode_info; +struct mpage_da_data; #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) -- cgit v1.2.3