diff options
-rw-r--r-- | Documentation/ABI/testing/procfs-diskstats | 22 | ||||
-rw-r--r-- | Documentation/ABI/testing/sysfs-block | 28 | ||||
-rw-r--r-- | Documentation/iostats.txt | 15 | ||||
-rw-r--r-- | block/blk-core.c | 61 | ||||
-rw-r--r-- | block/blk-merge.c | 6 | ||||
-rw-r--r-- | block/genhd.c | 28 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 12 | ||||
-rw-r--r-- | fs/partitions/check.c | 31 | ||||
-rw-r--r-- | include/linux/blkdev.h | 4 | ||||
-rw-r--r-- | include/linux/genhd.h | 153 |
10 files changed, 316 insertions, 44 deletions
diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats new file mode 100644 index 00000000000..99233902e09 --- /dev/null +++ b/Documentation/ABI/testing/procfs-diskstats @@ -0,0 +1,22 @@ +What: /proc/diskstats +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /proc/diskstats file displays the I/O statistics + of block devices. Each line contains the following 14 + fields: + 1 - major number + 2 - minor mumber + 3 - device name + 4 - reads completed succesfully + 5 - reads merged + 6 - sectors read + 7 - time spent reading (ms) + 8 - writes completed + 9 - writes merged + 10 - sectors written + 11 - time spent writing (ms) + 12 - I/Os currently in progress + 13 - time spent doing I/Os (ms) + 14 - weighted time spent doing I/Os (ms) + For more details refer to Documentation/iostats.txt diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block new file mode 100644 index 00000000000..4bd9ea53912 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block @@ -0,0 +1,28 @@ +What: /sys/block/<disk>/stat +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /sys/block/<disk>/stat files displays the I/O + statistics of disk <disk>. They contain 11 fields: + 1 - reads completed succesfully + 2 - reads merged + 3 - sectors read + 4 - time spent reading (ms) + 5 - writes completed + 6 - writes merged + 7 - sectors written + 8 - time spent writing (ms) + 9 - I/Os currently in progress + 10 - time spent doing I/Os (ms) + 11 - weighted time spent doing I/Os (ms) + For more details refer Documentation/iostats.txt + + +What: /sys/block/<disk>/<part>/stat +Date: February 2008 +Contact: Jerome Marchand <jmarchan@redhat.com> +Description: + The /sys/block/<disk>/<part>/stat files display the + I/O statistics of partition <part>. The format is the + same as the above-written /sys/block/<disk>/stat + format. diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt index b963c3b4afa..5925c3cd030 100644 --- a/Documentation/iostats.txt +++ b/Documentation/iostats.txt @@ -58,7 +58,7 @@ they should not wrap twice before you notice them. Each set of stats only applies to the indicated device; if you want system-wide stats you'll have to find all the devices and sum them all up. -Field 1 -- # of reads issued +Field 1 -- # of reads completed This is the total number of reads completed successfully. Field 2 -- # of reads merged, field 6 -- # of writes merged Reads and writes which are adjacent to each other may be merged for @@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time of queuing for partitions, and at completion for whole disks. This is a subtle distinction that is probably uninteresting for most cases. +More significant is the error induced by counting the numbers of +reads/writes before merges for partitions and after for disks. Since a +typical workload usually contains a lot of successive and adjacent requests, +the number of reads/writes issued can be several times higher than the +number of reads/writes completed. + +In 2.6.25, the full statistic set is again available for partitions and +disk and partition statistics are consistent again. Since we still don't +keep record of the partition-relative address, an operation is attributed to +the partition which contains the first sector of the request after the +eventual merges. As requests can be merged across partition, this could lead +to some (probably insignificant) innacuracy. + Additional notes ---------------- diff --git a/block/blk-core.c b/block/blk-core.c index 4afb39c8233..e9754dc98ec 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -60,10 +60,15 @@ static void drive_stat_acct(struct request *rq, int new_io) return; if (!new_io) { - __disk_stat_inc(rq->rq_disk, merges[rw]); + __all_stat_inc(rq->rq_disk, merges[rw], rq->sector); } else { + struct hd_struct *part = get_part(rq->rq_disk, rq->sector); disk_round_stats(rq->rq_disk); rq->rq_disk->in_flight++; + if (part) { + part_round_stats(part); + part->in_flight++; + } } } @@ -102,27 +107,38 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) } EXPORT_SYMBOL(blk_get_backing_dev_info); +/* + * We can't just memset() the structure, since the allocation path + * already stored some information in the request. + */ void rq_init(struct request_queue *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); - - rq->errors = 0; + rq->q = q; + rq->sector = rq->hard_sector = (sector_t) -1; + rq->nr_sectors = rq->hard_nr_sectors = 0; + rq->current_nr_sectors = rq->hard_cur_sectors = 0; rq->bio = rq->biotail = NULL; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); + rq->rq_disk = NULL; + rq->nr_phys_segments = 0; + rq->nr_hw_segments = 0; rq->ioprio = 0; + rq->special = NULL; rq->buffer = NULL; + rq->tag = -1; + rq->errors = 0; rq->ref_count = 1; - rq->q = q; - rq->special = NULL; + rq->cmd_len = 0; + memset(rq->cmd, 0, sizeof(rq->cmd)); rq->data_len = 0; + rq->sense_len = 0; rq->data = NULL; - rq->nr_phys_segments = 0; rq->sense = NULL; rq->end_io = NULL; rq->end_io_data = NULL; - rq->completion_data = NULL; rq->next_rq = NULL; } @@ -986,6 +1002,21 @@ void disk_round_stats(struct gendisk *disk) } EXPORT_SYMBOL_GPL(disk_round_stats); +void part_round_stats(struct hd_struct *part) +{ + unsigned long now = jiffies; + + if (now == part->stamp) + return; + + if (part->in_flight) { + __part_stat_add(part, time_in_queue, + part->in_flight * (now - part->stamp)); + __part_stat_add(part, io_ticks, (now - part->stamp)); + } + part->stamp = now; +} + /* * queue lock must be held */ @@ -1188,10 +1219,6 @@ static inline void blk_partition_remap(struct bio *bio) if (bio_sectors(bio) && bdev != bdev->bd_contains) { struct hd_struct *p = bdev->bd_part; - const int rw = bio_data_dir(bio); - - p->sectors[rw] += bio_sectors(bio); - p->ios[rw]++; bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; @@ -1519,7 +1546,8 @@ static int __end_that_request_first(struct request *req, int error, if (blk_fs_request(req) && req->rq_disk) { const int rw = rq_data_dir(req); - disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); + all_stat_add(req->rq_disk, sectors[rw], + nr_bytes >> 9, req->sector); } total_bytes = bio_nbytes = 0; @@ -1704,11 +1732,16 @@ static void end_that_request_last(struct request *req, int error) if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); + struct hd_struct *part = get_part(disk, req->sector); - __disk_stat_inc(disk, ios[rw]); - __disk_stat_add(disk, ticks[rw], duration); + __all_stat_inc(disk, ios[rw], req->sector); + __all_stat_add(disk, ticks[rw], duration, req->sector); disk_round_stats(disk); disk->in_flight--; + if (part) { + part_round_stats(part); + part->in_flight--; + } } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index 845ef813110..d3b84bbb776 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -454,8 +454,14 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); if (req->rq_disk) { + struct hd_struct *part + = get_part(req->rq_disk, req->sector); disk_round_stats(req->rq_disk); req->rq_disk->in_flight--; + if (part) { + part_round_stats(part); + part->in_flight--; + } } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/block/genhd.c b/block/genhd.c index de2ebb2fab4..53f2238e69c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -584,12 +584,28 @@ static int diskstats_show(struct seq_file *s, void *v) for (n = 0; n < gp->minors - 1; n++) { struct hd_struct *hd = gp->part[n]; - if (hd && hd->nr_sects) - seq_printf(s, "%4d %4d %s %u %u %u %u\n", - gp->major, n + gp->first_minor + 1, - disk_name(gp, n + 1, buf), - hd->ios[0], hd->sectors[0], - hd->ios[1], hd->sectors[1]); + if (!hd || !hd->nr_sects) + continue; + + preempt_disable(); + part_round_stats(hd); + preempt_enable(); + seq_printf(s, "%4d %4d %s %lu %lu %llu " + "%u %lu %lu %llu %u %u %u %u\n", + gp->major, n + gp->first_minor + 1, + disk_name(gp, n + 1, buf), + part_stat_read(hd, ios[0]), + part_stat_read(hd, merges[0]), + (unsigned long long)part_stat_read(hd, sectors[0]), + jiffies_to_msecs(part_stat_read(hd, ticks[0])), + part_stat_read(hd, ios[1]), + part_stat_read(hd, merges[1]), + (unsigned long long)part_stat_read(hd, sectors[1]), + jiffies_to_msecs(part_stat_read(hd, ticks[1])), + hd->in_flight, + jiffies_to_msecs(part_stat_read(hd, io_ticks)), + jiffies_to_msecs(part_stat_read(hd, time_in_queue)) + ); } return 0; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 44beb17e809..d00293ba3b4 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -751,15 +751,15 @@ gettgt(struct aoedev *d, char *addr) } static inline void -diskstats(struct gendisk *disk, struct bio *bio, ulong duration) +diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector) { unsigned long n_sect = bio->bi_size >> 9; const int rw = bio_data_dir(bio); - disk_stat_inc(disk, ios[rw]); - disk_stat_add(disk, ticks[rw], duration); - disk_stat_add(disk, sectors[rw], n_sect); - disk_stat_add(disk, io_ticks, duration); + all_stat_inc(disk, ios[rw], sector); + all_stat_add(disk, ticks[rw], duration, sector); + all_stat_add(disk, sectors[rw], n_sect, sector); + all_stat_add(disk, io_ticks, duration, sector); } void @@ -879,7 +879,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) } if (buf && --buf->nframesout == 0 && buf->resid == 0) { - diskstats(d->gd, buf->bio, jiffies - buf->stime); + diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector); n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; bio_endio(buf->bio, n); mempool_free(buf, d->bufpool); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 9a64045ff84..03f808c5b79 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -18,6 +18,7 @@ #include <linux/fs.h> #include <linux/kmod.h> #include <linux/ctype.h> +#include <linux/genhd.h> #include "check.h" @@ -215,9 +216,25 @@ static ssize_t part_stat_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%8u %8llu %8u %8llu\n", - p->ios[0], (unsigned long long)p->sectors[0], - p->ios[1], (unsigned long long)p->sectors[1]); + preempt_disable(); + part_round_stats(p); + preempt_enable(); + return sprintf(buf, + "%8lu %8lu %8llu %8u " + "%8lu %8lu %8llu %8u " + "%8u %8u %8u" + "\n", + part_stat_read(p, ios[READ]), + part_stat_read(p, merges[READ]), + (unsigned long long)part_stat_read(p, sectors[READ]), + jiffies_to_msecs(part_stat_read(p, ticks[READ])), + part_stat_read(p, ios[WRITE]), + part_stat_read(p, merges[WRITE]), + (unsigned long long)part_stat_read(p, sectors[WRITE]), + jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), + p->in_flight, + jiffies_to_msecs(part_stat_read(p, io_ticks)), + jiffies_to_msecs(part_stat_read(p, time_in_queue))); } #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -273,6 +290,7 @@ static struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); + free_part_stats(p); kfree(p); } @@ -312,8 +330,7 @@ void delete_partition(struct gendisk *disk, int part) disk->part[part-1] = NULL; p->start_sect = 0; p->nr_sects = 0; - p->ios[0] = p->ios[1] = 0; - p->sectors[0] = p->sectors[1] = 0; + part_stat_set_all(p, 0); kobject_put(p->holder_dir); device_del(&p->dev); put_device(&p->dev); @@ -336,6 +353,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, if (!p) return; + if (!init_part_stats(p)) { + kfree(p); + return; + } p->start_sect = start; p->nr_sects = len; p->partno = part; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 90392a9d7a9..e1888cc5b8a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -137,7 +137,9 @@ enum rq_flag_bits { #define BLK_MAX_CDB 16 /* - * try to put the fields that are referenced together in the same cacheline + * try to put the fields that are referenced together in the same cacheline. + * if you modify this structure, be sure to check block/blk-core.c:rq_init() + * as well! */ struct request { struct list_head queuelist; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1dbea0ac569..09a3b18918c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -91,16 +91,31 @@ struct partition { __le32 nr_sects; /* nr of sectors in partition */ } __attribute__((packed)); +struct disk_stats { + unsigned long sectors[2]; /* READs and WRITEs */ + unsigned long ios[2]; + unsigned long merges[2]; + unsigned long ticks[2]; + unsigned long io_ticks; + unsigned long time_in_queue; +}; + struct hd_struct { sector_t start_sect; sector_t nr_sects; struct device dev; struct kobject *holder_dir; - unsigned ios[2], sectors[2]; /* READs and WRITEs */ int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif + unsigned long stamp; + int in_flight; +#ifdef CONFIG_SMP + struct disk_stats *dkstats; +#else + struct disk_stats dkstats; +#endif }; #define GENHD_FL_REMOVABLE 1 @@ -111,15 +126,7 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_FAIL 64 -struct disk_stats { - unsigned long sectors[2]; /* READs and WRITEs */ - unsigned long ios[2]; - unsigned long merges[2]; - unsigned long ticks[2]; - unsigned long io_ticks; - unsigned long time_in_queue; -}; - + struct gendisk { int major; /* major number of driver */ int first_minor; @@ -158,6 +165,20 @@ struct gendisk { * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ +static inline struct hd_struct *get_part(struct gendisk *gendiskp, + sector_t sector) +{ + struct hd_struct *part; + int i; + for (i = 0; i < gendiskp->minors - 1; i++) { + part = gendiskp->part[i]; + if (part && part->start_sect <= sector + && sector < part->start_sect + part->nr_sects) + return part; + } + return NULL; +} + #ifdef CONFIG_SMP #define __disk_stat_add(gendiskp, field, addnd) \ (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) @@ -177,15 +198,62 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { memset(per_cpu_ptr(gendiskp->dkstats, i), value, sizeof (struct disk_stats)); } + +#define __part_stat_add(part, field, addnd) \ + (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) + +#define __all_stat_add(gendiskp, field, addnd, sector) \ +({ \ + struct hd_struct *part = get_part(gendiskp, sector); \ + if (part) \ + __part_stat_add(part, field, addnd); \ + __disk_stat_add(gendiskp, field, addnd); \ +}) + +#define part_stat_read(part, field) \ +({ \ + typeof(part->dkstats->field) res = 0; \ + int i; \ + for_each_possible_cpu(i) \ + res += per_cpu_ptr(part->dkstats, i)->field; \ + res; \ +}) + +static inline void part_stat_set_all(struct hd_struct *part, int value) { + int i; + for_each_possible_cpu(i) + memset(per_cpu_ptr(part->dkstats, i), value, + sizeof(struct disk_stats)); +} #else #define __disk_stat_add(gendiskp, field, addnd) \ (gendiskp->dkstats.field += addnd) #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { +static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +{ memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); } + +#define __part_stat_add(part, field, addnd) \ + (part->dkstats.field += addnd) + +#define __all_stat_add(gendiskp, field, addnd, sector) \ +({ \ + struct hd_struct *part = get_part(gendiskp, sector); \ + if (part) \ + part->dkstats.field += addnd; \ + __disk_stat_add(gendiskp, field, addnd); \ +}) + +#define part_stat_read(part, field) (part->dkstats.field) + +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ + memset(&part->dkstats, value, sizeof(struct disk_stats)); +} + #endif #define disk_stat_add(gendiskp, field, addnd) \ @@ -206,6 +274,45 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { #define disk_stat_sub(gendiskp, field, subnd) \ disk_stat_add(gendiskp, field, -subnd) +#define part_stat_add(gendiskp, field, addnd) \ + do { \ + preempt_disable(); \ + __part_stat_add(gendiskp, field, addnd);\ + preempt_enable(); \ + } while (0) + +#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) +#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) + +#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) +#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) + +#define __part_stat_sub(gendiskp, field, subnd) \ + __part_stat_add(gendiskp, field, -subnd) +#define part_stat_sub(gendiskp, field, subnd) \ + part_stat_add(gendiskp, field, -subnd) + +#define all_stat_add(gendiskp, field, addnd, sector) \ + do { \ + preempt_disable(); \ + __all_stat_add(gendiskp, field, addnd, sector); \ + preempt_enable(); \ + } while (0) + +#define __all_stat_dec(gendiskp, field, sector) \ + __all_stat_add(gendiskp, field, -1, sector) +#define all_stat_dec(gendiskp, field, sector) \ + all_stat_add(gendiskp, field, -1, sector) + +#define __all_stat_inc(gendiskp, field, sector) \ + __all_stat_add(gendiskp, field, 1, sector) +#define all_stat_inc(gendiskp, field, sector) \ + all_stat_add(gendiskp, field, 1, sector) + +#define __all_stat_sub(gendiskp, field, subnd, sector) \ + __all_stat_add(gendiskp, field, -subnd, sector) +#define all_stat_sub(gendiskp, field, subnd, sector) \ + all_stat_add(gendiskp, field, -subnd, sector) /* Inlines to alloc and free disk stats in struct gendisk */ #ifdef CONFIG_SMP @@ -221,6 +328,20 @@ static inline void free_disk_stats(struct gendisk *disk) { free_percpu(disk->dkstats); } + +static inline int init_part_stats(struct hd_struct *part) +{ + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ + free_percpu(part->dkstats); +} + #else /* CONFIG_SMP */ static inline int init_disk_stats(struct gendisk *disk) { @@ -230,10 +351,20 @@ static inline int init_disk_stats(struct gendisk *disk) static inline void free_disk_stats(struct gendisk *disk) { } + +static inline int init_part_stats(struct hd_struct *part) +{ + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ +} #endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ extern void disk_round_stats(struct gendisk *disk); +extern void part_round_stats(struct hd_struct *part); /* drivers/block/genhd.c */ extern int get_blkdev_list(char *, int); |