From b59e64d0ddb756af57ea032383bfd393a286a8e8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 2 Jul 2009 22:02:06 +0200 Subject: cciss: Ignore stale commands after reboot When doing an unexpected shutdown like kexec the cciss firmware might still have some commands in flight, which it is trying to complete. The driver is doing it's best on resetting the HBA, but sadly there's a firmware issue causing the firmware _not_ to abort or drop old commands. So the firmware will send us commands which we haven't accounted for, causing the driver to panic. With this patch we're just ignoring these commands as there is nothing we could be doing with them anyway. Signed-off-by: Hannes Reinecke Acked-by: Mike Miller Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 15 +++++++++++++-- drivers/block/cciss_cmd.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index c7a527c08a0..65a0655e7fc 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -226,8 +226,18 @@ static inline void addQ(struct hlist_head *list, CommandList_struct *c) static inline void removeQ(CommandList_struct *c) { - if (WARN_ON(hlist_unhashed(&c->list))) + /* + * After kexec/dump some commands might still + * be in flight, which the firmware will try + * to complete. Resetting the firmware doesn't work + * with old fw revisions, so we have to mark + * them off as 'stale' to prevent the driver from + * falling over. + */ + if (WARN_ON(hlist_unhashed(&c->list))) { + c->cmd_type = CMD_MSG_STALE; return; + } hlist_del_init(&c->list); } @@ -4246,7 +4256,8 @@ static void fail_all_cmds(unsigned long ctlr) while (!hlist_empty(&h->cmpQ)) { c = hlist_entry(h->cmpQ.first, CommandList_struct, list); removeQ(c); - c->err_info->CommandStatus = CMD_HARDWARE_ERR; + if (c->cmd_type != CMD_MSG_STALE) + c->err_info->CommandStatus = CMD_HARDWARE_ERR; if (c->cmd_type == CMD_RWREQ) { complete_command(h, c, 0); } else if (c->cmd_type == CMD_IOCTL_PEND) diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index cd665b00c7c..dbaed1ea0da 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -274,6 +274,7 @@ typedef struct _ErrorInfo_struct { #define CMD_SCSI 0x03 #define CMD_MSG_DONE 0x04 #define CMD_MSG_TIMEOUT 0x05 +#define CMD_MSG_STALE 0xff /* This structure needs to be divisible by 8 for new * indexing method. -- cgit v1.2.3 From ab0fd1debe730ec9998678a0c53caefbd121ed10 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Jul 2009 12:56:18 +0200 Subject: block: don't merge requests of different failfast settings Block layer used to merge requests and bios with different failfast settings. This caused regular IOs to fail prematurely when they were merged into failfast requests for readahead. Niel Lambrechts could trigger the problem semi-reliably on ext4 when resuming from STR. ext4 uses readahead when reading inodes and combined with the deterministic extra SATA PHY exception cycle during resume on the specific configuration, non-readahead inode read would fail causing ext4 errors. Please read the following thread for details. http://lkml.org/lkml/2009/5/23/21 This patch makes block layer reject merging if the failfast settings don't match. This is correct but likely to lower IO performance by preventing regular IOs from mingling into surrounding readahead requests. Changes to allow such mixed merges and handle errors correctly will be added later. Signed-off-by: Tejun Heo Reported-by: Niel Lambrechts Cc: Theodore Tso Signed-off-by: Jens Axboe --- block/blk-merge.c | 6 ++++++ block/elevator.c | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/block/blk-merge.c b/block/blk-merge.c index 39ce64432ba..e1999679a4d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -350,6 +350,12 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (blk_integrity_rq(req) != blk_integrity_rq(next)) return 0; + /* don't merge requests of different failfast settings */ + if (blk_failfast_dev(req) != blk_failfast_dev(next) || + blk_failfast_transport(req) != blk_failfast_transport(next) || + blk_failfast_driver(req) != blk_failfast_driver(next)) + return 0; + /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn diff --git a/block/elevator.c b/block/elevator.c index ca861927ba4..6f2375339a9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -100,6 +100,14 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) if (bio_integrity(bio) != blk_integrity_rq(rq)) return 0; + /* + * Don't merge if failfast settings don't match + */ + if (bio_failfast_dev(bio) != blk_failfast_dev(rq) || + bio_failfast_transport(bio) != blk_failfast_transport(rq) || + bio_failfast_driver(bio) != blk_failfast_driver(rq)) + return 0; + if (!elv_iosched_allow_merge(rq, bio)) return 0; -- cgit v1.2.3