From 2a3a59e5c977654d3aad5bc11cc0aca2303a7f44 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 11 Nov 2008 13:42:35 -0600 Subject: [SCSI] Fix hang in starved list processing Close possible infinite loop with interrupts off when devices are added back to the starved list. Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=11898 Reported-by: Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index f5d3b96890d..fa45a1a6686 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -567,15 +567,18 @@ static inline int scsi_host_is_busy(struct Scsi_Host *shost) */ static void scsi_run_queue(struct request_queue *q) { - struct scsi_device *starved_head = NULL, *sdev = q->queuedata; + struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost = sdev->host; + LIST_HEAD(starved_list); unsigned long flags; if (scsi_target(sdev)->single_lun) scsi_single_lun_run(sdev); spin_lock_irqsave(shost->host_lock, flags); - while (!list_empty(&shost->starved_list) && !scsi_host_is_busy(shost)) { + list_splice_init(&shost->starved_list, &starved_list); + + while (!list_empty(&starved_list)) { int flagset; /* @@ -588,24 +591,18 @@ static void scsi_run_queue(struct request_queue *q) * scsi_request_fn must get the host_lock before checking * or modifying starved_list or starved_entry. */ - sdev = list_entry(shost->starved_list.next, - struct scsi_device, starved_entry); - /* - * The *queue_ready functions can add a device back onto the - * starved list's tail, so we must check for a infinite loop. - */ - if (sdev == starved_head) + if (scsi_host_is_busy(shost)) break; - if (!starved_head) - starved_head = sdev; + sdev = list_entry(starved_list.next, + struct scsi_device, starved_entry); + list_del_init(&sdev->starved_entry); if (scsi_target_is_busy(scsi_target(sdev))) { list_move_tail(&sdev->starved_entry, &shost->starved_list); continue; } - list_del_init(&sdev->starved_entry); spin_unlock(shost->host_lock); spin_lock(sdev->request_queue->queue_lock); @@ -621,6 +618,8 @@ static void scsi_run_queue(struct request_queue *q) spin_lock(shost->host_lock); } + /* put any unprocessed entries back */ + list_splice(&starved_list, &shost->starved_list); spin_unlock_irqrestore(shost->host_lock, flags); blk_run_queue(q); -- cgit v1.2.3 From bce02614cd1b3d669af1195695e503e818b60fae Mon Sep 17 00:00:00 2001 From: Martin Petermann Date: Wed, 26 Nov 2008 18:07:35 +0100 Subject: [SCSI] zfcp: fix remote port status check For an incoming RSCN it was checked by the ZFCP_STATUS_PORT_DID_DID define to re-open a remote port or to test the connection. Since this define was re-used it was also necessary to replace that define with ZFCP_STATUS_PORT_PHYS_OPEN. Signed-off-by: Martin Petermann Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 1a7c80a77ff..c5f4bd217bf 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -125,8 +125,7 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range, read_lock_irqsave(&zfcp_data.config_lock, flags); list_for_each_entry(port, &fsf_req->adapter->port_list_head, list) { - /* FIXME: ZFCP_STATUS_PORT_DID_DID check is racy */ - if (!(atomic_read(&port->status) & ZFCP_STATUS_PORT_DID_DID)) + if (!(atomic_read(&port->status) & ZFCP_STATUS_PORT_PHYS_OPEN)) /* Try to connect to unused ports anyway. */ zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED, -- cgit v1.2.3 From 1c1cba17a9078c83a80a099bc207b208d664a13a Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Wed, 26 Nov 2008 18:07:36 +0100 Subject: [SCSI] zfcp: Fix opening of wka ports Running two wka_port_get calls in parallel could issue two open_port requests, overwriting the port handle. Don't issue an open_port for the state PORT_OPENING, and only read the data from GOOD responses. Signed-off-by: Christof Schmitt Acked-by: Swen Schillig Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fc.c | 3 ++- drivers/s390/scsi/zfcp_fsf.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index c5f4bd217bf..ae151390cb9 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -50,7 +50,8 @@ static int zfcp_wka_port_get(struct zfcp_wka_port *wka_port) if (mutex_lock_interruptible(&wka_port->mutex)) return -ERESTARTSYS; - if (wka_port->status != ZFCP_WKA_PORT_ONLINE) { + if (wka_port->status == ZFCP_WKA_PORT_OFFLINE || + wka_port->status == ZFCP_WKA_PORT_CLOSING) { wka_port->status = ZFCP_WKA_PORT_OPENING; if (zfcp_fsf_open_wka_port(wka_port)) wka_port->status = ZFCP_WKA_PORT_OFFLINE; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index d024442ee12..48bfd304924 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1584,6 +1584,7 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req) wka_port->status = ZFCP_WKA_PORT_OFFLINE; break; case FSF_PORT_ALREADY_OPEN: + break; case FSF_GOOD: wka_port->handle = header->port_handle; wka_port->status = ZFCP_WKA_PORT_ONLINE; -- cgit v1.2.3 From 633528c304f20b5c2e3e04d48f620548ce08b12e Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Wed, 26 Nov 2008 18:07:37 +0100 Subject: [SCSI] zfcp: returning an ERR_PTR where a NULL value is expected Aborting a SCSI cmnd might requrie to send a abort_fsf_cmnd. If the creation of this fsf_req fails an ERR_PTR is returned where a NULL value would be expected as an error indicator. This ERR_PTR is dereferenced as valid fsf_req in succeeding processing leading to an error. Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 48bfd304924..0343d881bab 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -930,8 +930,10 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_command(unsigned long old_req_id, goto out; req = zfcp_fsf_req_create(adapter, FSF_QTCB_ABORT_FCP_CMND, req_flags, adapter->pool.fsf_req_abort); - if (IS_ERR(req)) + if (IS_ERR(req)) { + req = NULL; goto out; + } if (unlikely(!(atomic_read(&unit->status) & ZFCP_STATUS_COMMON_UNBLOCKED))) @@ -2443,8 +2445,10 @@ struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_adapter *adapter, goto out; req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, req_flags, adapter->pool.fsf_req_scsi); - if (IS_ERR(req)) + if (IS_ERR(req)) { + req = NULL; goto out; + } req->status |= ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT; req->data = unit; -- cgit v1.2.3 From 26871c97d52e50dc574bd01967926650643b142a Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Wed, 26 Nov 2008 18:07:38 +0100 Subject: [SCSI] zfcp: verify for correct rport state before scanning for SCSI devs Prevent a SCSI target scan for a rport which have turned invalid in the meantime. Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 35364f64da7..c55dd2728f9 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -1186,7 +1186,9 @@ static void zfcp_erp_scsi_scan(struct work_struct *work) container_of(work, struct zfcp_erp_add_work, work); struct zfcp_unit *unit = p->unit; struct fc_rport *rport = unit->port->rport; - scsi_scan_target(&rport->dev, 0, rport->scsi_target_id, + + if (rport && rport->port_state == FC_PORTSTATE_ONLINE) + scsi_scan_target(&rport->dev, 0, rport->scsi_target_id, scsilun_to_int((struct scsi_lun *)&unit->fcp_lun), 0); atomic_clear_mask(ZFCP_STATUS_UNIT_SCSI_WORK_PENDING, &unit->status); zfcp_unit_put(unit); -- cgit v1.2.3 From 0ac55aa90f2c3bd08e57e52a513b82b18ce0a5bc Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Wed, 26 Nov 2008 18:07:39 +0100 Subject: [SCSI] zfcp: eliminate race between validation and locking The check of having a valid pointer was performed before the processing was secured by the lock. Between those two steps the pointer can turn invalid. During further processing another value is used (referenced by the pointer described above) as a function pointer which is never verified to be valid either, resulting under some circumstances in an invalid function call. This patch is fixing both issues. Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_fsf.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 0343d881bab..dc036769040 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2116,18 +2116,21 @@ static inline void zfcp_fsf_trace_latency(struct zfcp_fsf_req *fsf_req) static void zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *req) { - struct scsi_cmnd *scpnt = req->data; + struct scsi_cmnd *scpnt; struct fcp_rsp_iu *fcp_rsp_iu = (struct fcp_rsp_iu *) &(req->qtcb->bottom.io.fcp_rsp); u32 sns_len; char *fcp_rsp_info = (unsigned char *) &fcp_rsp_iu[1]; unsigned long flags; - if (unlikely(!scpnt)) - return; - read_lock_irqsave(&req->adapter->abort_lock, flags); + scpnt = req->data; + if (unlikely(!scpnt)) { + read_unlock_irqrestore(&req->adapter->abort_lock, flags); + return; + } + if (unlikely(req->status & ZFCP_STATUS_FSFREQ_ABORTED)) { set_host_byte(scpnt, DID_SOFT_ERROR); set_driver_byte(scpnt, SUGGEST_RETRY); -- cgit v1.2.3 From fca55b6fb587e42c7761ee30bd1a6c313a9270c9 Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Wed, 26 Nov 2008 18:07:40 +0100 Subject: [SCSI] zfcp: fix deadlock between wq triggered port scan and ERP Waiting for the ERP to be finished in a task running in the global kernel work-queue is a bad idea, especially if the ERP needs to run another job in this work-queue before it can finish. -> deadlock. This patch removes the necessity to wait for a finished ERP from the scan task and moves the job scheduling to the end of the ERP. Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 3 ++- drivers/s390/scsi/zfcp_fc.c | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index c55dd2728f9..c557ba34e1a 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -720,7 +720,6 @@ static int zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *act, goto failed_openfcp; atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &act->adapter->status); - schedule_work(&act->adapter->scan_work); return ZFCP_ERP_SUCCEEDED; @@ -1284,6 +1283,8 @@ static void zfcp_erp_action_cleanup(struct zfcp_erp_action *act, int result) case ZFCP_ERP_ACTION_REOPEN_ADAPTER: if (result != ZFCP_ERP_SUCCEEDED) zfcp_erp_rports_del(adapter); + else + schedule_work(&adapter->scan_work); zfcp_adapter_put(adapter); break; } diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index ae151390cb9..8aab3091a7b 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -610,7 +610,6 @@ int zfcp_scan_ports(struct zfcp_adapter *adapter) int ret, i; struct zfcp_gpn_ft *gpn_ft; - zfcp_erp_wait(adapter); /* wait until adapter is finished with ERP */ if (fc_host_port_type(adapter->scsi_host) != FC_PORTTYPE_NPORT) return 0; -- cgit v1.2.3 From f7a65e92e4bef3ccf9e008ed8cd52d914b6a4adb Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Thu, 27 Nov 2008 11:44:07 +0100 Subject: [SCSI] zfcp: prevent double decrement on host_busy while being busy The zfcp_scsi_queuecommand was not acting according to the standard when the respective unit was not available. In this case an -EBUSY was returned, which is not valid in itself, and in addition scsi_done was called. This combination is not allowed and was leading to a double finish of the request and therefor double decrement of the host_busy counter. Signed-off-by: Swen Schillig Signed-off-by: Christof Schmitt Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index e46fd3e9f68..468c880f8b6 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -88,7 +88,7 @@ static int zfcp_scsi_queuecommand(struct scsi_cmnd *scpnt, ret = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, 0, ZFCP_REQ_AUTO_CLEANUP); if (unlikely(ret == -EBUSY)) - zfcp_scsi_command_fail(scpnt, DID_NO_CONNECT); + return SCSI_MLQUEUE_DEVICE_BUSY; else if (unlikely(ret < 0)) return SCSI_MLQUEUE_HOST_BUSY; -- cgit v1.2.3 From 8fbd64e2eeb81d4b94be935a15d6d4829ec62aa2 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 30 Nov 2008 10:15:37 -0600 Subject: [SCSI] aacraid: switch to block timeout aacraid updates the timeout in its slave configure routine if it is too small. This now needs to update the request queue timeout in block. Cc: AACRAID list Signed-off-by: James Bottomley --- drivers/scsi/aacraid/linit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 9aa301c1ed0..162cd927d94 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -427,8 +427,8 @@ static int aac_slave_configure(struct scsi_device *sdev) * Firmware has an individual device recovery time typically * of 35 seconds, give us a margin. */ - if (sdev->timeout < (45 * HZ)) - sdev->timeout = 45 * HZ; + if (sdev->request_queue->rq_timeout < (45 * HZ)) + blk_queue_rq_timeout(sdev->request_queue, 45*HZ); for (cid = 0; cid < aac->maximum_num_containers; ++cid) if (aac->fsa_dev[cid].valid) ++num_lsu; -- cgit v1.2.3 From 97b5648a8bc2aef980645ee39d31bba0933a6112 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 30 Nov 2008 10:20:37 -0600 Subject: [SCSI] ibmvscsi: switch to block timeout ibmvscsi sets the timeout in its slave configure routine for disk devices. This now needs to update the request queue timeout in block. Cc: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ibmvscsi/ibmvscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 87e09f35d3d..6cad1758243 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1442,7 +1442,7 @@ static int ibmvscsi_slave_configure(struct scsi_device *sdev) spin_lock_irqsave(shost->host_lock, lock_flags); if (sdev->type == TYPE_DISK) { sdev->allow_restart = 1; - sdev->timeout = 60 * HZ; + blk_queue_rq_timeout(sdev->request_queue, 60 * HZ); } scsi_adjust_queue_depth(sdev, 0, shost->cmd_per_lun); spin_unlock_irqrestore(shost->host_lock, lock_flags); -- cgit v1.2.3 From ee1ab9e945803a607ba5c2a96b69e4820a806552 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 30 Nov 2008 10:27:51 -0600 Subject: [SCSI] megaraid_sas: switch to block timeout megaraid_sas sets the timeout in its slave configure routine for devices on special channels. This now needs to update the request queue timeout in block. Cc: "Yang, Bo" Signed-off-by: James Bottomley --- drivers/scsi/megaraid/megaraid_sas.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index a454f94623d..17ce7abe17e 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -1016,7 +1016,8 @@ static int megasas_slave_configure(struct scsi_device *sdev) * The RAID firmware may require extended timeouts. */ if (sdev->channel >= MEGASAS_MAX_PD_CHANNELS) - sdev->timeout = MEGASAS_DEFAULT_CMD_TIMEOUT * HZ; + blk_queue_rq_timeout(sdev->request_queue, + MEGASAS_DEFAULT_CMD_TIMEOUT * HZ); return 0; } -- cgit v1.2.3 From 9728c0814ecb505546696a659858fdb761375544 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 30 Nov 2008 10:32:26 -0600 Subject: [SCSI] make scsi_eh_try_stu use block timeout scsi_eh_try_stu() was still using the timeout parameter in the device which is now not set (i.e. zero filled) meaning that it waited no time at all for the start unit command to complete (leading the routine to conclude failure every time). This lead to a 2.6.27 regression: http://bugzilla.kernel.org/show_bug.cgi?id=12120 Where firewire devices that were non spec compliant wouldn't spin up. Fix this by using the block queue timeout value instead. Reported-by: Stefan Richter Signed-off-by: James Bottomley --- drivers/scsi/scsi_error.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 386361778eb..edfaf241c5b 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -932,8 +932,7 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd) int i, rtn = NEEDS_RETRY; for (i = 0; rtn == NEEDS_RETRY && i < 2; i++) - rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, - scmd->device->timeout, 0); + rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, scmd->device->request_queue->rq_timeout, 0); if (rtn == SUCCESS) return 0; -- cgit v1.2.3 From dc5c49bff34e5b5a4334560dc7f7dfeae91d8962 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 30 Nov 2008 10:38:08 -0600 Subject: [SCSI] stex: switch to block timeout stex sets the timeout in its slave configure routine for all devices. This now needs to update the request queue timeout in block. Cc: Ed Lin Signed-off-by: James Bottomley --- drivers/scsi/stex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 3790906a77d..2fa830c0be2 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -477,7 +477,7 @@ stex_slave_config(struct scsi_device *sdev) { sdev->use_10_for_rw = 1; sdev->use_10_for_ms = 1; - sdev->timeout = 60 * HZ; + blk_queue_rq_timeout(sdev->request_queue, 60 * HZ); sdev->tagged_supported = 1; return 0; -- cgit v1.2.3