aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/procfs-diskstats22
-rw-r--r--Documentation/ABI/testing/sysfs-block28
-rw-r--r--Documentation/iostats.txt15
-rw-r--r--block/blk-core.c61
-rw-r--r--block/blk-merge.c6
-rw-r--r--block/genhd.c28
-rw-r--r--drivers/block/aoe/aoecmd.c12
-rw-r--r--fs/partitions/check.c31
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/genhd.h153
10 files changed, 316 insertions, 44 deletions
diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
new file mode 100644
index 00000000000..99233902e09
--- /dev/null
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -0,0 +1,22 @@
+What: /proc/diskstats
+Date: February 2008
+Contact: Jerome Marchand <jmarchan@redhat.com>
+Description:
+ The /proc/diskstats file displays the I/O statistics
+ of block devices. Each line contains the following 14
+ fields:
+ 1 - major number
+ 2 - minor mumber
+ 3 - device name
+ 4 - reads completed succesfully
+ 5 - reads merged
+ 6 - sectors read
+ 7 - time spent reading (ms)
+ 8 - writes completed
+ 9 - writes merged
+ 10 - sectors written
+ 11 - time spent writing (ms)
+ 12 - I/Os currently in progress
+ 13 - time spent doing I/Os (ms)
+ 14 - weighted time spent doing I/Os (ms)
+ For more details refer to Documentation/iostats.txt
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
new file mode 100644
index 00000000000..4bd9ea53912
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block
@@ -0,0 +1,28 @@
+What: /sys/block/<disk>/stat
+Date: February 2008
+Contact: Jerome Marchand <jmarchan@redhat.com>
+Description:
+ The /sys/block/<disk>/stat files displays the I/O
+ statistics of disk <disk>. They contain 11 fields:
+ 1 - reads completed succesfully
+ 2 - reads merged
+ 3 - sectors read
+ 4 - time spent reading (ms)
+ 5 - writes completed
+ 6 - writes merged
+ 7 - sectors written
+ 8 - time spent writing (ms)
+ 9 - I/Os currently in progress
+ 10 - time spent doing I/Os (ms)
+ 11 - weighted time spent doing I/Os (ms)
+ For more details refer Documentation/iostats.txt
+
+
+What: /sys/block/<disk>/<part>/stat
+Date: February 2008
+Contact: Jerome Marchand <jmarchan@redhat.com>
+Description:
+ The /sys/block/<disk>/<part>/stat files display the
+ I/O statistics of partition <part>. The format is the
+ same as the above-written /sys/block/<disk>/stat
+ format.
diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
index b963c3b4afa..5925c3cd030 100644
--- a/Documentation/iostats.txt
+++ b/Documentation/iostats.txt
@@ -58,7 +58,7 @@ they should not wrap twice before you notice them.
Each set of stats only applies to the indicated device; if you want
system-wide stats you'll have to find all the devices and sum them all up.
-Field 1 -- # of reads issued
+Field 1 -- # of reads completed
This is the total number of reads completed successfully.
Field 2 -- # of reads merged, field 6 -- # of writes merged
Reads and writes which are adjacent to each other may be merged for
@@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time
of queuing for partitions, and at completion for whole disks. This is
a subtle distinction that is probably uninteresting for most cases.
+More significant is the error induced by counting the numbers of
+reads/writes before merges for partitions and after for disks. Since a
+typical workload usually contains a lot of successive and adjacent requests,
+the number of reads/writes issued can be several times higher than the
+number of reads/writes completed.
+
+In 2.6.25, the full statistic set is again available for partitions and
+disk and partition statistics are consistent again. Since we still don't
+keep record of the partition-relative address, an operation is attributed to
+the partition which contains the first sector of the request after the
+eventual merges. As requests can be merged across partition, this could lead
+to some (probably insignificant) innacuracy.
+
Additional notes
----------------
diff --git a/block/blk-core.c b/block/blk-core.c
index 4afb39c8233..e9754dc98ec 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -60,10 +60,15 @@ static void drive_stat_acct(struct request *rq, int new_io)
return;
if (!new_io) {
- __disk_stat_inc(rq->rq_disk, merges[rw]);
+ __all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
} else {
+ struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
disk_round_stats(rq->rq_disk);
rq->rq_disk->in_flight++;
+ if (part) {
+ part_round_stats(part);
+ part->in_flight++;
+ }
}
}
@@ -102,27 +107,38 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
}
EXPORT_SYMBOL(blk_get_backing_dev_info);
+/*
+ * We can't just memset() the structure, since the allocation path
+ * already stored some information in the request.
+ */
void rq_init(struct request_queue *q, struct request *rq)
{
INIT_LIST_HEAD(&rq->queuelist);
INIT_LIST_HEAD(&rq->donelist);
-
- rq->errors = 0;
+ rq->q = q;
+ rq->sector = rq->hard_sector = (sector_t) -1;
+ rq->nr_sectors = rq->hard_nr_sectors = 0;
+ rq->current_nr_sectors = rq->hard_cur_sectors = 0;
rq->bio = rq->biotail = NULL;
INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node);
+ rq->rq_disk = NULL;
+ rq->nr_phys_segments = 0;
+ rq->nr_hw_segments = 0;
rq->ioprio = 0;
+ rq->special = NULL;
rq->buffer = NULL;
+ rq->tag = -1;
+ rq->errors = 0;
rq->ref_count = 1;
- rq->q = q;
- rq->special = NULL;
+ rq->cmd_len = 0;
+ memset(rq->cmd, 0, sizeof(rq->cmd));
rq->data_len = 0;
+ rq->sense_len = 0;
rq->data = NULL;
- rq->nr_phys_segments = 0;
rq->sense = NULL;
rq->end_io = NULL;
rq->end_io_data = NULL;
- rq->completion_data = NULL;
rq->next_rq = NULL;
}
@@ -986,6 +1002,21 @@ void disk_round_stats(struct gendisk *disk)
}
EXPORT_SYMBOL_GPL(disk_round_stats);
+void part_round_stats(struct hd_struct *part)
+{
+ unsigned long now = jiffies;
+
+ if (now == part->stamp)
+ return;
+
+ if (part->in_flight) {
+ __part_stat_add(part, time_in_queue,
+ part->in_flight * (now - part->stamp));
+ __part_stat_add(part, io_ticks, (now - part->stamp));
+ }
+ part->stamp = now;
+}
+
/*
* queue lock must be held
*/
@@ -1188,10 +1219,6 @@ static inline void blk_partition_remap(struct bio *bio)
if (bio_sectors(bio) && bdev != bdev->bd_contains) {
struct hd_struct *p = bdev->bd_part;
- const int rw = bio_data_dir(bio);
-
- p->sectors[rw] += bio_sectors(bio);
- p->ios[rw]++;
bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;
@@ -1519,7 +1546,8 @@ static int __end_that_request_first(struct request *req, int error,
if (blk_fs_request(req) && req->rq_disk) {
const int rw = rq_data_dir(req);
- disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
+ all_stat_add(req->rq_disk, sectors[rw],
+ nr_bytes >> 9, req->sector);
}
total_bytes = bio_nbytes = 0;
@@ -1704,11 +1732,16 @@ static void end_that_request_last(struct request *req, int error)
if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
unsigned long duration = jiffies - req->start_time;
const int rw = rq_data_dir(req);
+ struct hd_struct *part = get_part(disk, req->sector);
- __disk_stat_inc(disk, ios[rw]);
- __disk_stat_add(disk, ticks[rw], duration);
+ __all_stat_inc(disk, ios[rw], req->sector);
+ __all_stat_add(disk, ticks[rw], duration, req->sector);
disk_round_stats(disk);
disk->in_flight--;
+ if (part) {
+ part_round_stats(part);
+ part->in_flight--;
+ }
}
if (req->end_io)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 845ef813110..d3b84bbb776 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -454,8 +454,14 @@ static int attempt_merge(struct request_queue *q, struct request *req,
elv_merge_requests(q, req, next);
if (req->rq_disk) {
+ struct hd_struct *part
+ = get_part(req->rq_disk, req->sector);
disk_round_stats(req->rq_disk);
req->rq_disk->in_flight--;
+ if (part) {
+ part_round_stats(part);
+ part->in_flight--;
+ }
}
req->ioprio = ioprio_best(req->ioprio, next->ioprio);
diff --git a/block/genhd.c b/block/genhd.c
index de2ebb2fab4..53f2238e69c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -584,12 +584,28 @@ static int diskstats_show(struct seq_file *s, void *v)
for (n = 0; n < gp->minors - 1; n++) {
struct hd_struct *hd = gp->part[n];
- if (hd && hd->nr_sects)
- seq_printf(s, "%4d %4d %s %u %u %u %u\n",
- gp->major, n + gp->first_minor + 1,
- disk_name(gp, n + 1, buf),
- hd->ios[0], hd->sectors[0],
- hd->ios[1], hd->sectors[1]);
+ if (!hd || !hd->nr_sects)
+ continue;
+
+ preempt_disable();
+ part_round_stats(hd);
+ preempt_enable();
+ seq_printf(s, "%4d %4d %s %lu %lu %llu "
+ "%u %lu %lu %llu %u %u %u %u\n",
+ gp->major, n + gp->first_minor + 1,
+ disk_name(gp, n + 1, buf),
+ part_stat_read(hd, ios[0]),
+ part_stat_read(hd, merges[0]),
+ (unsigned long long)part_stat_read(hd, sectors[0]),
+ jiffies_to_msecs(part_stat_read(hd, ticks[0])),
+ part_stat_read(hd, ios[1]),
+ part_stat_read(hd, merges[1]),
+ (unsigned long long)part_stat_read(hd, sectors[1]),
+ jiffies_to_msecs(part_stat_read(hd, ticks[1])),
+ hd->in_flight,
+ jiffies_to_msecs(part_stat_read(hd, io_ticks)),
+ jiffies_to_msecs(part_stat_read(hd, time_in_queue))
+ );
}
return 0;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 44beb17e809..d00293ba3b4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -751,15 +751,15 @@ gettgt(struct aoedev *d, char *addr)
}
static inline void
-diskstats(struct gendisk *disk, struct bio *bio, ulong duration)
+diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
{
unsigned long n_sect = bio->bi_size >> 9;
const int rw = bio_data_dir(bio);
- disk_stat_inc(disk, ios[rw]);
- disk_stat_add(disk, ticks[rw], duration);
- disk_stat_add(disk, sectors[rw], n_sect);
- disk_stat_add(disk, io_ticks, duration);
+ all_stat_inc(disk, ios[rw], sector);
+ all_stat_add(disk, ticks[rw], duration, sector);
+ all_stat_add(disk, sectors[rw], n_sect, sector);
+ all_stat_add(disk, io_ticks, duration, sector);
}
void
@@ -879,7 +879,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
}
if (buf && --buf->nframesout == 0 && buf->resid == 0) {
- diskstats(d->gd, buf->bio, jiffies - buf->stime);
+ diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
bio_endio(buf->bio, n);
mempool_free(buf, d->bufpool);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9a64045ff84..03f808c5b79 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/kmod.h>
#include <linux/ctype.h>
+#include <linux/genhd.h>
#include "check.h"
@@ -215,9 +216,25 @@ static ssize_t part_stat_show(struct device *dev,
{
struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%8u %8llu %8u %8llu\n",
- p->ios[0], (unsigned long long)p->sectors[0],
- p->ios[1], (unsigned long long)p->sectors[1]);
+ preempt_disable();
+ part_round_stats(p);
+ preempt_enable();
+ return sprintf(buf,
+ "%8lu %8lu %8llu %8u "
+ "%8lu %8lu %8llu %8u "
+ "%8u %8u %8u"
+ "\n",
+ part_stat_read(p, ios[READ]),
+ part_stat_read(p, merges[READ]),
+ (unsigned long long)part_stat_read(p, sectors[READ]),
+ jiffies_to_msecs(part_stat_read(p, ticks[READ])),
+ part_stat_read(p, ios[WRITE]),
+ part_stat_read(p, merges[WRITE]),
+ (unsigned long long)part_stat_read(p, sectors[WRITE]),
+ jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
+ p->in_flight,
+ jiffies_to_msecs(part_stat_read(p, io_ticks)),
+ jiffies_to_msecs(part_stat_read(p, time_in_queue)));
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -273,6 +290,7 @@ static struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev)
{
struct hd_struct *p = dev_to_part(dev);
+ free_part_stats(p);
kfree(p);
}
@@ -312,8 +330,7 @@ void delete_partition(struct gendisk *disk, int part)
disk->part[part-1] = NULL;
p->start_sect = 0;
p->nr_sects = 0;
- p->ios[0] = p->ios[1] = 0;
- p->sectors[0] = p->sectors[1] = 0;
+ part_stat_set_all(p, 0);
kobject_put(p->holder_dir);
device_del(&p->dev);
put_device(&p->dev);
@@ -336,6 +353,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
if (!p)
return;
+ if (!init_part_stats(p)) {
+ kfree(p);
+ return;
+ }
p->start_sect = start;
p->nr_sects = len;
p->partno = part;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 90392a9d7a9..e1888cc5b8a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -137,7 +137,9 @@ enum rq_flag_bits {
#define BLK_MAX_CDB 16
/*
- * try to put the fields that are referenced together in the same cacheline
+ * try to put the fields that are referenced together in the same cacheline.
+ * if you modify this structure, be sure to check block/blk-core.c:rq_init()
+ * as well!
*/
struct request {
struct list_head queuelist;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 1dbea0ac569..09a3b18918c 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -91,16 +91,31 @@ struct partition {
__le32 nr_sects; /* nr of sectors in partition */
} __attribute__((packed));
+struct disk_stats {
+ unsigned long sectors[2]; /* READs and WRITEs */
+ unsigned long ios[2];
+ unsigned long merges[2];
+ unsigned long ticks[2];
+ unsigned long io_ticks;
+ unsigned long time_in_queue;
+};
+
struct hd_struct {
sector_t start_sect;
sector_t nr_sects;
struct device dev;
struct kobject *holder_dir;
- unsigned ios[2], sectors[2]; /* READs and WRITEs */
int policy, partno;
#ifdef CONFIG_FAIL_MAKE_REQUEST
int make_it_fail;
#endif
+ unsigned long stamp;
+ int in_flight;
+#ifdef CONFIG_SMP
+ struct disk_stats *dkstats;
+#else
+ struct disk_stats dkstats;
+#endif
};
#define GENHD_FL_REMOVABLE 1
@@ -111,15 +126,7 @@ struct hd_struct {
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
#define GENHD_FL_FAIL 64
-struct disk_stats {
- unsigned long sectors[2]; /* READs and WRITEs */
- unsigned long ios[2];
- unsigned long merges[2];
- unsigned long ticks[2];
- unsigned long io_ticks;
- unsigned long time_in_queue;
-};
-
+
struct gendisk {
int major; /* major number of driver */
int first_minor;
@@ -158,6 +165,20 @@ struct gendisk {
* The __ variants should only be called in critical sections. The full
* variants disable/enable preemption.
*/
+static inline struct hd_struct *get_part(struct gendisk *gendiskp,
+ sector_t sector)
+{
+ struct hd_struct *part;
+ int i;
+ for (i = 0; i < gendiskp->minors - 1; i++) {
+ part = gendiskp->part[i];
+ if (part && part->start_sect <= sector
+ && sector < part->start_sect + part->nr_sects)
+ return part;
+ }
+ return NULL;
+}
+
#ifdef CONFIG_SMP
#define __disk_stat_add(gendiskp, field, addnd) \
(per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd)
@@ -177,15 +198,62 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) {
memset(per_cpu_ptr(gendiskp->dkstats, i), value,
sizeof (struct disk_stats));
}
+
+#define __part_stat_add(part, field, addnd) \
+ (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
+
+#define __all_stat_add(gendiskp, field, addnd, sector) \
+({ \
+ struct hd_struct *part = get_part(gendiskp, sector); \
+ if (part) \
+ __part_stat_add(part, field, addnd); \
+ __disk_stat_add(gendiskp, field, addnd); \
+})
+
+#define part_stat_read(part, field) \
+({ \
+ typeof(part->dkstats->field) res = 0; \
+ int i; \
+ for_each_possible_cpu(i) \
+ res += per_cpu_ptr(part->dkstats, i)->field; \
+ res; \
+})
+
+static inline void part_stat_set_all(struct hd_struct *part, int value) {
+ int i;
+ for_each_possible_cpu(i)
+ memset(per_cpu_ptr(part->dkstats, i), value,
+ sizeof(struct disk_stats));
+}
#else
#define __disk_stat_add(gendiskp, field, addnd) \
(gendiskp->dkstats.field += addnd)
#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field)
-static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) {
+static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
+{
memset(&gendiskp->dkstats, value, sizeof (struct disk_stats));
}
+
+#define __part_stat_add(part, field, addnd) \
+ (part->dkstats.field += addnd)
+
+#define __all_stat_add(gendiskp, field, addnd, sector) \
+({ \
+ struct hd_struct *part = get_part(gendiskp, sector); \
+ if (part) \
+ part->dkstats.field += addnd; \
+ __disk_stat_add(gendiskp, field, addnd); \
+})
+
+#define part_stat_read(part, field) (part->dkstats.field)
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
+ memset(&part->dkstats, value, sizeof(struct disk_stats));
+}
+
#endif
#define disk_stat_add(gendiskp, field, addnd) \
@@ -206,6 +274,45 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) {
#define disk_stat_sub(gendiskp, field, subnd) \
disk_stat_add(gendiskp, field, -subnd)
+#define part_stat_add(gendiskp, field, addnd) \
+ do { \
+ preempt_disable(); \
+ __part_stat_add(gendiskp, field, addnd);\
+ preempt_enable(); \
+ } while (0)
+
+#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1)
+#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1)
+
+#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1)
+#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1)
+
+#define __part_stat_sub(gendiskp, field, subnd) \
+ __part_stat_add(gendiskp, field, -subnd)
+#define part_stat_sub(gendiskp, field, subnd) \
+ part_stat_add(gendiskp, field, -subnd)
+
+#define all_stat_add(gendiskp, field, addnd, sector) \
+ do { \
+ preempt_disable(); \
+ __all_stat_add(gendiskp, field, addnd, sector); \
+ preempt_enable(); \
+ } while (0)
+
+#define __all_stat_dec(gendiskp, field, sector) \
+ __all_stat_add(gendiskp, field, -1, sector)
+#define all_stat_dec(gendiskp, field, sector) \
+ all_stat_add(gendiskp, field, -1, sector)
+
+#define __all_stat_inc(gendiskp, field, sector) \
+ __all_stat_add(gendiskp, field, 1, sector)
+#define all_stat_inc(gendiskp, field, sector) \
+ all_stat_add(gendiskp, field, 1, sector)
+
+#define __all_stat_sub(gendiskp, field, subnd, sector) \
+ __all_stat_add(gendiskp, field, -subnd, sector)
+#define all_stat_sub(gendiskp, field, subnd, sector) \
+ all_stat_add(gendiskp, field, -subnd, sector)
/* Inlines to alloc and free disk stats in struct gendisk */
#ifdef CONFIG_SMP
@@ -221,6 +328,20 @@ static inline void free_disk_stats(struct gendisk *disk)
{
free_percpu(disk->dkstats);
}
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+ part->dkstats = alloc_percpu(struct disk_stats);
+ if (!part->dkstats)
+ return 0;
+ return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+ free_percpu(part->dkstats);
+}
+
#else /* CONFIG_SMP */
static inline int init_disk_stats(struct gendisk *disk)
{
@@ -230,10 +351,20 @@ static inline int init_disk_stats(struct gendisk *disk)
static inline void free_disk_stats(struct gendisk *disk)
{
}
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+ return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+}
#endif /* CONFIG_SMP */
/* drivers/block/ll_rw_blk.c */
extern void disk_round_stats(struct gendisk *disk);
+extern void part_round_stats(struct hd_struct *part);
/* drivers/block/genhd.c */
extern int get_blkdev_list(char *, int);