From 6000a368cd8e6da1caf101411bdb494cd6fb8b09 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 19 Aug 2008 18:45:30 -0500
Subject: [SCSI] block: separate failfast into multiple bits.

Multipath is best at handling transport errors. If it gets a device
error then there is not much the multipath layer can do. It will just
access the same device but from a different path.

This patch breaks up failfast into device, transport and driver errors.
The multipath layers (md and dm mutlipath) only ask the lower levels to
fast fail transport errors. The user of failfast, read ahead, will ask
to fast fail on all errors.

Note that blk_noretry_request will return true if any failfast bit
is set. This allows drivers that do not support the multipath failfast
bits to continue to fail on any failfast error like before. Drivers
like scsi that are able to fail fast specific errors can check
for the specific fail fast type. In the next patch I will convert
scsi.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/linux/bio.h | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux/bio.h')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index ff5b4cf9e2d..1beda208cbf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -129,25 +129,30 @@ struct bio {
  * bit 2 -- barrier
  *	Insert a serialization point in the IO queue, forcing previously
  *	submitted IO to be completed before this oen is issued.
- * bit 3 -- fail fast, don't want low level driver retries
- * bit 4 -- synchronous I/O hint: the block layer will unplug immediately
+ * bit 3 -- synchronous I/O hint: the block layer will unplug immediately
  *	Note that this does NOT indicate that the IO itself is sync, just
  *	that the block layer will not postpone issue of this IO by plugging.
- * bit 5 -- metadata request
+ * bit 4 -- metadata request
  *	Used for tracing to differentiate metadata and data IO. May also
  *	get some preferential treatment in the IO scheduler
- * bit 6 -- discard sectors
+ * bit 5 -- discard sectors
  *	Informs the lower level device that this range of sectors is no longer
  *	used by the file system and may thus be freed by the device. Used
  *	for flash based storage.
+ * bit 6 -- fail fast device errors
+ * bit 7 -- fail fast transport errors
+ * bit 8 -- fail fast driver errors
+ *	Don't want driver retries for any fast fail whatever the reason.
  */
 #define BIO_RW		0	/* Must match RW in req flags (blkdev.h) */
 #define BIO_RW_AHEAD	1	/* Must match FAILFAST in req flags */
 #define BIO_RW_BARRIER	2
-#define BIO_RW_FAILFAST	3
-#define BIO_RW_SYNC	4
-#define BIO_RW_META	5
-#define BIO_RW_DISCARD	6
+#define BIO_RW_SYNC	3
+#define BIO_RW_META	4
+#define BIO_RW_DISCARD	5
+#define BIO_RW_FAILFAST_DEV		6
+#define BIO_RW_FAILFAST_TRANSPORT	7
+#define BIO_RW_FAILFAST_DRIVER		8
 
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
@@ -174,7 +179,10 @@ struct bio {
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
 #define bio_barrier(bio)	((bio)->bi_rw & (1 << BIO_RW_BARRIER))
 #define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
-#define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
+#define bio_failfast_dev(bio)	((bio)->bi_rw &	(1 << BIO_RW_FAILFAST_DEV))
+#define bio_failfast_transport(bio)	\
+	((bio)->bi_rw & (1 << BIO_RW_FAILFAST_TRANSPORT))
+#define bio_failfast_driver(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST_DRIVER))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
 #define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
 #define bio_discard(bio)	((bio)->bi_rw & (1 << BIO_RW_DISCARD))
-- 
cgit v1.2.3


From 8677142710516d986d932d6f1fba7be8382c1fec Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 13 Oct 2008 14:19:05 +0200
Subject: block: fix nr_phys_segments miscalculation bug

This fixes the bug reported by Nikanth Karthikesan <knikanth@suse.de>:

http://lkml.org/lkml/2008/10/2/203

The root cause of the bug is that blk_phys_contig_segment
miscalculates q->max_segment_size.

blk_phys_contig_segment checks:

req->biotail->bi_size + next_req->bio->bi_size > q->max_segment_size

But blk_recalc_rq_segments might expect that req->biotail and the
previous bio in the req are supposed be merged into one
segment. blk_recalc_rq_segments might also expect that next_req->bio
and the next bio in the next_req are supposed be merged into one
segment. In such case, we merge two requests that can't be merged
here. Later, blk_rq_map_sg gives more segments than it should.

We need to keep track of segment size in blk_recalc_rq_segments and
use it to see if two requests can be merged. This patch implements it
in the similar way that we used to do for hw merging (virtual
merging).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bio.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux/bio.h')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index ff5b4cf9e2d..dc3cec386a9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -79,6 +79,13 @@ struct bio {
 
 	unsigned int		bi_size;	/* residual I/O count */
 
+	/*
+	 * To keep track of the max segment size, we account for the
+	 * sizes of the first and last mergeable segments in this bio.
+	 */
+	unsigned int		bi_seg_front_size;
+	unsigned int		bi_seg_back_size;
+
 	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
 
 	unsigned int		bi_comp_cpu;	/* completion CPU */
-- 
cgit v1.2.3