From 5d1a536621403b442eef60ddd35e6b7a46fb04b7 Mon Sep 17 00:00:00 2001
From: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Date: Tue, 30 Oct 2007 10:40:12 +0100
Subject: Deadline iosched: Factor out finding latter reques

Factor finding the next request in sector-sorted order into
a function deadline_latter_request.

Signed-off-by: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/deadline-iosched.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 1a511ffaf8a..a44437e5a94 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -55,6 +55,20 @@ static void deadline_move_request(struct deadline_data *, struct request *);
 
 #define RQ_RB_ROOT(dd, rq)	(&(dd)->sort_list[rq_data_dir((rq))])
 
+/*
+ * get the request after `rq' in sector-sorted order
+ */
+static inline struct request *
+deadline_latter_request(struct request *rq)
+{
+	struct rb_node *node = rb_next(&rq->rb_node);
+
+	if (node)
+		return rb_entry_rq(node);
+
+	return NULL;
+}
+
 static void
 deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
 {
@@ -74,13 +88,8 @@ deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
 {
 	const int data_dir = rq_data_dir(rq);
 
-	if (dd->next_rq[data_dir] == rq) {
-		struct rb_node *rbnext = rb_next(&rq->rb_node);
-
-		dd->next_rq[data_dir] = NULL;
-		if (rbnext)
-			dd->next_rq[data_dir] = rb_entry_rq(rbnext);
-	}
+	if (dd->next_rq[data_dir] == rq)
+		dd->next_rq[data_dir] = deadline_latter_request(rq);
 
 	elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
 }
@@ -198,14 +207,11 @@ static void
 deadline_move_request(struct deadline_data *dd, struct request *rq)
 {
 	const int data_dir = rq_data_dir(rq);
-	struct rb_node *rbnext = rb_next(&rq->rb_node);
 
 	dd->next_rq[READ] = NULL;
 	dd->next_rq[WRITE] = NULL;
+	dd->next_rq[data_dir] = deadline_latter_request(rq);
 
-	if (rbnext)
-		dd->next_rq[data_dir] = rb_entry_rq(rbnext);
-	
 	dd->last_sector = rq->sector + rq->nr_sectors;
 
 	/*
-- 
cgit v1.2.3


From dfb3d72a9aa519672c9ae06f0d2f93eccb35482f Mon Sep 17 00:00:00 2001
From: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Date: Tue, 30 Oct 2007 10:40:13 +0100
Subject: Deadline iosched: Reset batch for ordered requests

The deadline I/O scheduler does not reset the batch count when starting
a new batch at a higher-sectored request.  This means the second and
subsequent batch in the same data direction will never exceed a single
request in size whenever higher-sectored requests are pending.

This patch gives new batches in the same data direction as old ones
their full quota of requests by resetting the batch count.

Signed-off-by: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/deadline-iosched.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index a44437e5a94..cb94c838087 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -306,12 +306,11 @@ dispatch_writes:
 dispatch_find_request:
 	/*
 	 * we are not running a batch, find best request for selected data_dir
+	 * and start a new batch
 	 */
 	if (deadline_check_fifo(dd, data_dir)) {
 		/* An expired request exists - satisfy it */
-		dd->batching = 0;
 		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
-		
 	} else if (dd->next_rq[data_dir]) {
 		/*
 		 * The last req was the same dir and we have a next request in
@@ -325,12 +324,13 @@ dispatch_find_request:
 		 * higher-sectored requests. Go back to the lowest sectored
 		 * request (1 way elevator) and start a new batch.
 		 */
-		dd->batching = 0;
 		node = rb_first(&dd->sort_list[data_dir]);
 		if (node)
 			rq = rb_entry_rq(node);
 	}
 
+	dd->batching = 0;
+
 dispatch_request:
 	/*
 	 * rq is the selected appropriate request.
-- 
cgit v1.2.3


From 6f5d8aa6382eef2b26032c88656270bdae7f0c42 Mon Sep 17 00:00:00 2001
From: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Date: Tue, 30 Oct 2007 10:40:13 +0100
Subject: Deadline iosched: Fix batching fairness

After switching data directions, deadline always starts the next batch
from the lowest-sector request.  This gives excessive deadline expiries
and large latency and throughput disparity between high- and low-sector
requests; an order of magnitude in some tests.

This patch changes the batching behaviour so new batches start from the
request whose expiry is earliest.

Signed-off-by: Aaron Carroll <aaronc@gelato.unsw.edu.au>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/deadline-iosched.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index cb94c838087..a054eef8dff 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -306,27 +306,20 @@ dispatch_writes:
 dispatch_find_request:
 	/*
 	 * we are not running a batch, find best request for selected data_dir
-	 * and start a new batch
 	 */
-	if (deadline_check_fifo(dd, data_dir)) {
-		/* An expired request exists - satisfy it */
+	if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) {
+		/*
+		 * A deadline has expired, the last request was in the other
+		 * direction, or we have run out of higher-sectored requests.
+		 * Start again from the request with the earliest expiry time.
+		 */
 		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
-	} else if (dd->next_rq[data_dir]) {
+	} else {
 		/*
 		 * The last req was the same dir and we have a next request in
 		 * sort order. No expired requests so continue on from here.
 		 */
 		rq = dd->next_rq[data_dir];
-	} else {
-		struct rb_node *node;
-		/*
-		 * The last req was the other direction or we have run out of
-		 * higher-sectored requests. Go back to the lowest sectored
-		 * request (1 way elevator) and start a new batch.
-		 */
-		node = rb_first(&dd->sort_list[data_dir]);
-		if (node)
-			rq = rb_entry_rq(node);
 	}
 
 	dd->batching = 0;
-- 
cgit v1.2.3


From 5ec140e600b7d6624c657f008833f0e71bd5ef48 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Wed, 31 Oct 2007 08:33:24 +0100
Subject: dm: bounce_pfn limit added

Device mapper uses its own bounce_pfn that may differ from one on underlying
device. In that way dm can build incorrect requests that contain sg elements
greater than underlying device is able to handle.

This is the cause of slab corruption in i2o layer, occurred on i386 arch when
very long direct IO requests are addressed to dm-over-i2o device.

Signed-off-by: Vasily Averin <vvs@sw.ru>
Cc: <stable@kernel.org>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/md/dm-table.c         | 7 +++++++
 include/linux/device-mapper.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 8939e610508..5a7eb650181 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -102,6 +102,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
 	lhs->seg_boundary_mask =
 		min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
 
+	lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
+
 	lhs->no_cluster |= rhs->no_cluster;
 }
 
@@ -566,6 +568,8 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
 		min_not_zero(rs->seg_boundary_mask,
 			     q->seg_boundary_mask);
 
+	rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
+
 	rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
@@ -707,6 +711,8 @@ static void check_for_valid_limits(struct io_restrictions *rs)
 		rs->max_segment_size = MAX_SEGMENT_SIZE;
 	if (!rs->seg_boundary_mask)
 		rs->seg_boundary_mask = -1;
+	if (!rs->bounce_pfn)
+		rs->bounce_pfn = -1;
 }
 
 int dm_table_add_target(struct dm_table *t, const char *type,
@@ -891,6 +897,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
 	q->hardsect_size = t->limits.hardsect_size;
 	q->max_segment_size = t->limits.max_segment_size;
 	q->seg_boundary_mask = t->limits.seg_boundary_mask;
+	q->bounce_pfn = t->limits.bounce_pfn;
 	if (t->limits.no_cluster)
 		q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
 	else
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 37c66d1254b..b8b7c51389f 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -116,6 +116,7 @@ struct io_restrictions {
 	unsigned short		hardsect_size;
 	unsigned int		max_segment_size;
 	unsigned long		seg_boundary_mask;
+	unsigned long		bounce_pfn;
 	unsigned char		no_cluster; /* inverted so that 0 is default */
 };
 
-- 
cgit v1.2.3


From 51fd77bd9f512ab6cc9df0733ba1caaab89eb957 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 2 Nov 2007 08:49:08 +0100
Subject: [BLOCK] Don't allow empty barriers to be passed down to queues that
 don't grok them

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/ll_rw_blk.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 56f2646612e..3e4e8bfbe85 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3221,6 +3221,7 @@ static inline void __generic_make_request(struct bio *bio)
 	sector_t old_sector;
 	int ret, nr_sectors = bio_sectors(bio);
 	dev_t old_dev;
+	int err = -EIO;
 
 	might_sleep();
 
@@ -3248,7 +3249,7 @@ static inline void __generic_make_request(struct bio *bio)
 				bdevname(bio->bi_bdev, b),
 				(long long) bio->bi_sector);
 end_io:
-			bio_endio(bio, -EIO);
+			bio_endio(bio, err);
 			break;
 		}
 
@@ -3283,6 +3284,10 @@ end_io:
 
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
+		if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
+			err = -EOPNOTSUPP;
+			goto end_io;
+		}
 
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
-- 
cgit v1.2.3