From a82afdfcb8c0df09776b6458af6b68fc58b2e87b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Jul 2009 17:48:16 +0900 Subject: block: use the same failfast bits for bio and request bio and request use the same set of failfast bits. This patch makes the following changes to simplify things. * enumify BIO_RW* bits and reorder bits such that BIOS_RW_FAILFAST_* bits coincide with __REQ_FAILFAST_* bits. * The above pushes BIO_RW_AHEAD out of sync with __REQ_FAILFAST_DEV but the matching is useless anyway. init_request_from_bio() is responsible for setting FAILFAST bits on FS requests and non-FS requests never use BIO_RW_AHEAD. Drop the code and comment from blk_rq_bio_prep(). * Define REQ_FAILFAST_MASK which is OR of all FAILFAST bits and simplify FAILFAST flags handling in init_request_from_bio(). Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/bio.h | 43 +++++++++++++++++++++++-------------------- include/linux/blkdev.h | 4 ++++ 2 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 2892b710771..a299ed38fcd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -142,37 +142,40 @@ struct bio { * * bit 0 -- data direction * If not set, bio is a read from device. If set, it's a write to device. - * bit 1 -- rw-ahead when set - * bit 2 -- barrier + * bit 1 -- fail fast device errors + * bit 2 -- fail fast transport errors + * bit 3 -- fail fast driver errors + * bit 4 -- rw-ahead when set + * bit 5 -- barrier * Insert a serialization point in the IO queue, forcing previously * submitted IO to be completed before this one is issued. - * bit 3 -- synchronous I/O hint. - * bit 4 -- Unplug the device immediately after submitting this bio. - * bit 5 -- metadata request + * bit 6 -- synchronous I/O hint. + * bit 7 -- Unplug the device immediately after submitting this bio. + * bit 8 -- metadata request * Used for tracing to differentiate metadata and data IO. May also * get some preferential treatment in the IO scheduler - * bit 6 -- discard sectors + * bit 9 -- discard sectors * Informs the lower level device that this range of sectors is no longer * used by the file system and may thus be freed by the device. Used * for flash based storage. - * bit 7 -- fail fast device errors - * bit 8 -- fail fast transport errors - * bit 9 -- fail fast driver errors * Don't want driver retries for any fast fail whatever the reason. * bit 10 -- Tell the IO scheduler not to wait for more requests after this one has been submitted, even if it is a SYNC request. */ -#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ -#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ -#define BIO_RW_BARRIER 2 -#define BIO_RW_SYNCIO 3 -#define BIO_RW_UNPLUG 4 -#define BIO_RW_META 5 -#define BIO_RW_DISCARD 6 -#define BIO_RW_FAILFAST_DEV 7 -#define BIO_RW_FAILFAST_TRANSPORT 8 -#define BIO_RW_FAILFAST_DRIVER 9 -#define BIO_RW_NOIDLE 10 +enum bio_rw_flags { + BIO_RW, + BIO_RW_FAILFAST_DEV, + BIO_RW_FAILFAST_TRANSPORT, + BIO_RW_FAILFAST_DRIVER, + /* above flags must match REQ_* */ + BIO_RW_AHEAD, + BIO_RW_BARRIER, + BIO_RW_SYNCIO, + BIO_RW_UNPLUG, + BIO_RW_META, + BIO_RW_DISCARD, + BIO_RW_NOIDLE, +}; #define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 69103e053c9..c3015736d81 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -93,6 +93,7 @@ enum rq_flag_bits { __REQ_FAILFAST_DEV, /* no driver retries of device errors */ __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + /* above flags must match BIO_RW_* */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ @@ -143,6 +144,9 @@ enum rq_flag_bits { #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ + REQ_FAILFAST_DRIVER) + #define BLK_MAX_CDB 16 /* -- cgit v1.2.3 From 80a761fd33cf812f771e212139157bf8f58d4b3f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Jul 2009 17:48:17 +0900 Subject: block: implement mixed merge of different failfast requests Failfast has characteristics from other attributes. When issuing, executing and successuflly completing requests, failfast doesn't make any difference. It only affects how a request is handled on failure. Allowing requests with different failfast settings to be merged cause normal IOs to fail prematurely while not allowing has performance penalties as failfast is used for read aheads which are likely to be located near in-flight or to-be-issued normal IOs. This patch introduces the concept of 'mixed merge'. A request is a mixed merge if it is merge of segments which require different handling on failure. Currently the only mixable attributes are failfast ones (or lack thereof). When a bio with different failfast settings is added to an existing request or requests of different failfast settings are merged, the merged request is marked mixed. Each bio carries failfast settings and the request always tracks failfast state of the first bio. When the request fails, blk_rq_err_bytes() can be used to determine how many bytes can be safely failed without crossing into an area which requires further retrials. This allows request merging regardless of failfast settings while keeping the failure handling correct. This patch only implements mixed merge but doesn't enable it. The next one will update SCSI to make use of mixed merge. Signed-off-by: Tejun Heo Cc: Niel Lambrechts Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c3015736d81..650b6a9cb67 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -115,6 +115,7 @@ enum rq_flag_bits { __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NOIDLE, /* Don't anticipate more IO after this one */ __REQ_IO_STAT, /* account I/O stat */ + __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_NR_BITS, /* stops here */ }; @@ -143,6 +144,7 @@ enum rq_flag_bits { #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) #define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ REQ_FAILFAST_DRIVER) @@ -832,11 +834,13 @@ static inline void blk_run_address_space(struct address_space *mapping) } /* - * blk_rq_pos() : the current sector - * blk_rq_bytes() : bytes left in the entire request - * blk_rq_cur_bytes() : bytes left in the current segment - * blk_rq_sectors() : sectors left in the entire request - * blk_rq_cur_sectors() : sectors left in the current segment + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_err_bytes() : bytes left till the next error boundary + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment + * blk_rq_err_sectors() : sectors left till the next error boundary */ static inline sector_t blk_rq_pos(const struct request *rq) { @@ -853,6 +857,8 @@ static inline int blk_rq_cur_bytes(const struct request *rq) return rq->bio ? bio_cur_bytes(rq->bio) : 0; } +extern unsigned int blk_rq_err_bytes(const struct request *rq); + static inline unsigned int blk_rq_sectors(const struct request *rq) { return blk_rq_bytes(rq) >> 9; @@ -863,6 +869,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +static inline unsigned int blk_rq_err_sectors(const struct request *rq) +{ + return blk_rq_err_bytes(rq) >> 9; +} + /* * Request issue related functions. */ @@ -889,10 +900,12 @@ extern bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, int error); extern bool blk_end_request_cur(struct request *rq, int error); +extern bool blk_end_request_err(struct request *rq, int error); extern bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void __blk_end_request_all(struct request *rq, int error); extern bool __blk_end_request_cur(struct request *rq, int error); +extern bool __blk_end_request_err(struct request *rq, int error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); -- cgit v1.2.3 From e7e503aedb1f4d165081cb8d47a58c38f80f0cb4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 28 Jul 2009 10:15:30 +0200 Subject: block: make bio_rw_flagged() return a bool Makes for a saner interface, instead of returning the bit position. Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a299ed38fcd..4f8fd0221cd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -177,7 +177,10 @@ enum bio_rw_flags { BIO_RW_NOIDLE, }; -#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) +static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag) +{ + return (bio->bi_rw & (1 << flag)) != 0; +} /* * Old defines, these should eventually be replaced by direct usage of -- cgit v1.2.3 From 1f98a13f623e0ef666690a18c1250335fc6d7ef1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 11 Sep 2009 14:32:04 +0200 Subject: bio: first step in sanitizing the bio->bi_rw flag testing Get rid of any functions that test for these bits and make callers use bio_rw_flagged() directly. Then it is at least directly apparent what variable and flag they check. Signed-off-by: Jens Axboe --- include/linux/bio.h | 25 +++++++------------------ include/linux/blkdev.h | 2 +- 2 files changed, 8 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 4f8fd0221cd..5be93f18d84 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -177,28 +177,17 @@ enum bio_rw_flags { BIO_RW_NOIDLE, }; +/* + * First four bits must match between bio->bi_rw and rq->cmd_flags, make + * that explicit here. + */ +#define BIO_RW_RQ_MASK 0xf + static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag) { return (bio->bi_rw & (1 << flag)) != 0; } -/* - * Old defines, these should eventually be replaced by direct usage of - * bio_rw_flagged() - */ -#define bio_barrier(bio) bio_rw_flagged(bio, BIO_RW_BARRIER) -#define bio_sync(bio) bio_rw_flagged(bio, BIO_RW_SYNCIO) -#define bio_unplug(bio) bio_rw_flagged(bio, BIO_RW_UNPLUG) -#define bio_failfast_dev(bio) bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV) -#define bio_failfast_transport(bio) \ - bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT) -#define bio_failfast_driver(bio) \ - bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER) -#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD) -#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META) -#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD) -#define bio_noidle(bio) bio_rw_flagged(bio, BIO_RW_NOIDLE) - /* * upper 16 bits of bi_rw define the io priority of this bio */ @@ -222,7 +211,7 @@ static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag) #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) +#define bio_empty_barrier(bio) (bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD)) static inline unsigned int bio_cur_bytes(struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 650b6a9cb67..88edb62def8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -86,7 +86,7 @@ enum { }; /* - * request type modified bits. first two bits match BIO_RW* bits, important + * request type modified bits. first four bits match BIO_RW* bits, important */ enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ -- cgit v1.2.3 From fb1e75389bd06fd5987e9cda1b4e0305c782f854 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 30 Jul 2009 08:18:24 +0200 Subject: block: improve queue_should_plug() by looking at IO depths Instead of just checking whether this device uses block layer tagging, we can improve the detection by looking at the maximum queue depth it has reached. If that crosses 4, then deem it a queuing device. This is important on high IOPS devices, since plugging hurts the performance there (it can be as much as 10-15% of the sys time). Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 88edb62def8..98b45633a27 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -459,6 +459,7 @@ struct request_queue #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ +#define QUEUE_FLAG_CQ 16 /* hardware does queuing */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -581,6 +582,7 @@ enum { #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) +#define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) -- cgit v1.2.3 From 5e605b64a183a6c0e84cdb99a6f8acb1f8200437 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Aug 2009 09:07:21 +0200 Subject: block: add blk-iopoll, a NAPI like approach for block devices This borrows some code from NAPI and implements a polled completion mode for block devices. The idea is the same as NAPI - instead of doing the command completion when the irq occurs, schedule a dedicated softirq in the hopes that we will complete more IO when the iopoll handler is invoked. Devices have a budget of commands assigned, and will stay in polled mode as long as they continue to consume their budget from the iopoll softirq handler. If they do not, the device is set back to interrupt completion mode. This patch holds the core bits for blk-iopoll, device driver support sold separately. Signed-off-by: Jens Axboe --- include/linux/blk-iopoll.h | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/interrupt.h | 1 + 2 files changed, 42 insertions(+) create mode 100644 include/linux/blk-iopoll.h (limited to 'include/linux') diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h new file mode 100644 index 00000000000..b2e1739a2e7 --- /dev/null +++ b/include/linux/blk-iopoll.h @@ -0,0 +1,41 @@ +#ifndef BLK_IOPOLL_H +#define BLK_IOPOLL_H + +struct blk_iopoll; +typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); + +struct blk_iopoll { + struct list_head list; + unsigned long state; + unsigned long data; + int weight; + int max; + blk_iopoll_fn *poll; +}; + +enum { + IOPOLL_F_SCHED = 0, + IOPOLL_F_DISABLE = 1, +}; + +static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) +{ + return !test_bit(IOPOLL_F_DISABLE, &iop->state) && + !test_and_set_bit(IOPOLL_F_SCHED, &iop->state); +} + +static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) +{ + return test_bit(IOPOLL_F_DISABLE, &iop->state); +} + +extern void blk_iopoll_sched(struct blk_iopoll *); +extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); +extern void blk_iopoll_complete(struct blk_iopoll *); +extern void __blk_iopoll_complete(struct blk_iopoll *); +extern void blk_iopoll_enable(struct blk_iopoll *); +extern void blk_iopoll_disable(struct blk_iopoll *); + +extern int blk_iopoll_enabled; + +#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 35e7df1e9f3..edd8d5c9039 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -344,6 +344,7 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, + BLOCK_IOPOLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, -- cgit v1.2.3 From d62f843b295393124970d29316344150c7de009b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Aug 2009 08:31:14 +0200 Subject: block: make blk_iopoll_prep_sched() follow normal 0/1 return convention Return 0 if we successfully marked this iopoll structure as ours for scheduling, instead of 1. Signed-off-by: Jens Axboe --- include/linux/blk-iopoll.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h index b2e1739a2e7..308734d3d4a 100644 --- a/include/linux/blk-iopoll.h +++ b/include/linux/blk-iopoll.h @@ -18,10 +18,17 @@ enum { IOPOLL_F_DISABLE = 1, }; +/* + * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating + * that we were the first to acquire this iop for scheduling. If this iop + * is currently disabled, return "failure". + */ static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) { - return !test_bit(IOPOLL_F_DISABLE, &iop->state) && - !test_and_set_bit(IOPOLL_F_SCHED, &iop->state); + if (!test_bit(IOPOLL_F_DISABLE, &iop->state)) + return test_and_set_bit(IOPOLL_F_SCHED, &iop->state); + + return 1; } static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) -- cgit v1.2.3 From 01e97f6b897bf06ec83375d691f2f4d57f5b3a09 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Sep 2009 20:06:47 +0200 Subject: block: enable rq CPU completion affinity by default Test results here look good, and on big OLTP runs it has also shown to significantly increase cycles attributed to the database and cause a performance boost. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 98b45633a27..8bf1a10e4d8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -463,7 +463,8 @@ struct request_queue #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ - (1 << QUEUE_FLAG_STACKABLE)) + (1 << QUEUE_FLAG_STACKABLE) | \ + (1 << QUEUE_FLAG_SAME_COMP)) static inline int queue_is_locked(struct request_queue *q) { -- cgit v1.2.3 From a9327cac440be4d8333bba975cbbf76045096275 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Fri, 11 Sep 2009 09:18:54 +0200 Subject: Seperate read and write statistics of in_flight requests Currently, there is a single in_flight counter measuring the number of requests in the request_queue. But some monitoring tools would like to know how many read requests and write requests are in progress. Split the current in_flight counter into two seperate counters for read and write. This information is exported as a sysfs attribute, as changing the currently available stat files would break the existing tools. Signed-off-by: Nikanth Karthikesan Signed-off-by: Jens Axboe --- include/linux/genhd.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 45fc320a53c..44263cb2712 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -98,7 +98,7 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - int in_flight; + int in_flight[2]; #ifdef CONFIG_SMP struct disk_stats *dkstats; #else @@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part) +static inline void part_inc_in_flight(struct hd_struct *part, int rw) { - part->in_flight++; + part->in_flight[rw]++; if (part->partno) - part_to_disk(part)->part0.in_flight++; + part_to_disk(part)->part0.in_flight[rw]++; } -static inline void part_dec_in_flight(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part, int rw) { - part->in_flight--; + part->in_flight[rw]--; if (part->partno) - part_to_disk(part)->part0.in_flight--; + part_to_disk(part)->part0.in_flight[rw]--; +} + +static inline int part_in_flight(struct hd_struct *part) +{ + return part->in_flight[0] + part->in_flight[1]; } /* block/blk-core.c */ @@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_inflight_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); -- cgit v1.2.3 From 3c5820c743479285ce2678fd3c12b1fd39fe998f Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 11 Sep 2009 21:54:52 +0200 Subject: block: Optimal I/O limit wrapper Implement blk_limits_io_opt() and make blk_queue_io_opt() a wrapper around it. DM needs this to avoid poking at the queue_limits directly. Signed-off-by: Martin K. Petersen Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8bf1a10e4d8..86c2bdff3b8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -935,6 +935,7 @@ extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); +extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_default_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, -- cgit v1.2.3 From 3d2257f157c2324acbc0fa0fa54e8626a987edd2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 12 Sep 2009 07:35:37 +0200 Subject: Make DISCARD_BARRIER and DISCARD_NOBARRIER writes instead of reads The commands are conceptually writes, and in the case of IDE and SCSI commands actually are writes. They were only reads because we thought that would interact better with the elevators. Now the elevators know about discard requests, that advantage no longer exists. Signed-off-by: David Woodhouse Signed-off-by: Matthew Wilcox Signed-off-by: Jens Axboe --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c1f993515f5..7f5a8ad2500 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -161,8 +161,8 @@ struct inodes_stat_t { * These aren't really reads or writes, they pass down information about * parts of device that are now unused by the file system. */ -#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) -#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) +#define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD)) +#define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.3 From 746cd1e7e4a555ddaee53b19a46e05c9c61eaf09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 12 Sep 2009 07:35:43 +0200 Subject: block: use blkdev_issue_discard in blk_ioctl_discard blk_ioctl_discard duplicates large amounts of code from blkdev_issue_discard, the only difference between the two is that blkdev_issue_discard needs to send a barrier discard request and blk_ioctl_discard a non-barrier one, and blk_ioctl_discard needs to wait on the request. To facilitates this add a flags argument to blkdev_issue_discard to control both aspects of the behaviour. This will be very useful later on for using the waiting funcitonality for other callers. Based on an earlier patch from Matthew Wilcox . Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 86c2bdff3b8..e23a86cae5a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -998,15 +998,18 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } extern int blkdev_issue_flush(struct block_device *, sector_t *); -extern int blkdev_issue_discard(struct block_device *, - sector_t sector, sector_t nr_sects, gfp_t); +#define DISCARD_FL_WAIT 0x01 /* wait for completion */ +#define DISCARD_FL_BARRIER 0x02 /* issue DISCARD_BARRIER request */ +extern int blkdev_issue_discard(struct block_device *, sector_t sector, + sector_t nr_sects, gfp_t, int flags); static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks) { block <<= (sb->s_blocksize_bits - 9); nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL, + DISCARD_FL_BARRIER); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3