From 18e144d32cd3dae6953c385e4b376ef9688b61b0 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 27 May 2005 15:04:47 -0700
Subject: [SCSI] qla2xxx: fix bad locking during eh_abort

Correct incorrect locking order in qla2xxx_eh_abort() handler which
would case a hang during certain code-paths.

With extra pieces to fix the irq state in the locks.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 579448222d6..3c97aa45772 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -507,6 +507,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	int ret, i;
 	unsigned int id, lun;
 	unsigned long serial;
+	unsigned long flags;
 
 	if (!CMD_SP(cmd))
 		return FAILED;
@@ -519,7 +520,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 
 	/* Check active list for command command. */
 	spin_unlock_irq(ha->host->host_lock);
-	spin_lock(&ha->hardware_lock);
+	spin_lock_irqsave(&ha->hardware_lock, flags);
 	for (i = 1; i < MAX_OUTSTANDING_COMMANDS; i++) {
 		sp = ha->outstanding_cmds[i];
 
@@ -534,7 +535,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 		    sp->state));
 		DEBUG3(qla2x00_print_scsi_cmd(cmd);)
 
-		spin_unlock(&ha->hardware_lock);
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 		if (qla2x00_abort_command(ha, sp)) {
 			DEBUG2(printk("%s(%ld): abort_command "
 			    "mbx failed.\n", __func__, ha->host_no));
@@ -543,20 +544,19 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 			    "mbx success.\n", __func__, ha->host_no));
 			ret = SUCCESS;
 		}
-		spin_lock(&ha->hardware_lock);
+		spin_lock_irqsave(&ha->hardware_lock, flags);
 
 		break;
 	}
+	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	/* Wait for the command to be returned. */
 	if (ret == SUCCESS) {
-		spin_unlock(&ha->hardware_lock);
 		if (qla2x00_eh_wait_on_command(ha, cmd) != QLA_SUCCESS) {
 			qla_printk(KERN_ERR, ha, 
 			    "scsi(%ld:%d:%d): Abort handler timed out -- %lx "
 			    "%x.\n", ha->host_no, id, lun, serial, ret);
 		}
-		spin_lock(&ha->hardware_lock);
 	}
 	spin_lock_irq(ha->host->host_lock);
 
@@ -588,6 +588,7 @@ qla2x00_eh_wait_for_pending_target_commands(scsi_qla_host_t *ha, unsigned int t)
 	int	status;
 	srb_t		*sp;
 	struct scsi_cmnd *cmd;
+	unsigned long flags;
 
 	status = 0;
 
@@ -596,11 +597,11 @@ qla2x00_eh_wait_for_pending_target_commands(scsi_qla_host_t *ha, unsigned int t)
 	 * array
 	 */
 	for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
-		spin_lock(&ha->hardware_lock);
+		spin_lock_irqsave(&ha->hardware_lock, flags);
 		sp = ha->outstanding_cmds[cnt];
 		if (sp) {
 			cmd = sp->cmd;
-			spin_unlock(&ha->hardware_lock);
+			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 			if (cmd->device->id == t) {
 				if (!qla2x00_eh_wait_on_command(ha, cmd)) {
 					status = 1;
@@ -608,7 +609,7 @@ qla2x00_eh_wait_for_pending_target_commands(scsi_qla_host_t *ha, unsigned int t)
 				}
 			}
 		} else {
-			spin_unlock(&ha->hardware_lock);
+			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 		}
 	}
 	return (status);
@@ -740,6 +741,7 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *ha)
 	int	status;
 	srb_t		*sp;
 	struct scsi_cmnd *cmd;
+	unsigned long flags;
 
 	status = 1;
 
@@ -748,17 +750,17 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *ha)
 	 * array
 	 */
 	for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
-		spin_lock(&ha->hardware_lock);
+		spin_lock_irqsave(&ha->hardware_lock, flags);
 		sp = ha->outstanding_cmds[cnt];
 		if (sp) {
 			cmd = sp->cmd;
-			spin_unlock(&ha->hardware_lock);
+			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 			status = qla2x00_eh_wait_on_command(ha, cmd);
 			if (status == 0)
 				break;
 		}
 		else {
-			spin_unlock(&ha->hardware_lock);
+			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 		}
 	}
 	return (status);
-- 
cgit v1.2.3


From c92715b3c22e94105a8fd9e4a23047d05c5077e7 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <ntl@pobox.com>
Date: Thu, 2 Jun 2005 17:15:09 -0500
Subject: [SCSI] fix slab corruption during ipr probe

With CONFIG_DEBUG_SLAB=y I see slab corruption messages during boot on
pSeries machines with IPR adapters with any 2.6.12-rc kernel.

The change which seems to have introduced the problem is "SCSI: revamp
target scanning routines" and may be found at:
http://marc.theaimsgroup.com/?l=bk-commits-head&m=111093946426333&w=2

In order to revert that in a 2.6.12-rc1 tree, I had to revert "target
code updates to support scanned targets" first:
http://marc.theaimsgroup.com/?l=bk-commits-head&m=111094132524649&w=2

With both patches reverted, the corruption messages go away.

ipr: IBM Power RAID SCSI Device Driver version: 2.0.13 (February 21,
2005)
ipr 0001:d0:01.0: Found IOA with IRQ: 167
ipr 0001:d0:01.0: Starting IOA initialization sequence.
ipr 0001:d0:01.0: Adapter firmware version: 020A005C
ipr 0001:d0:01.0: IOA initialized.
scsi0 : IBM 570B Storage Adapter
  Vendor: IBM       Model: VSBPD4E1  U4SCSI  Rev: 4770
  Type:   Enclosure                          ANSI SCSI revision: 02
  Vendor: IBM   H0  Model: HUS103036FL3800   Rev: RPQF
  Type:   Direct-Access                      ANSI SCSI revision: 04
  Vendor: IBM   H0  Model: HUS103036FL3800   Rev: RPQF
  Type:   Direct-Access                      ANSI SCSI revision: 04
  Vendor: IBM   H0  Model: HUS103036FL3800   Rev: RPQF
  Type:   Direct-Access                      ANSI SCSI revision: 04
  Vendor: IBM   H0  Model: HUS103036FL3800   Rev: RPQF
  Type:   Direct-Access                      ANSI SCSI revision: 04
  Vendor: IBM       Model: VSBPD4E1  U4SCSI  Rev: 4770
  Type:   Enclosure                          ANSI SCSI revision: 02
Slab corruption: start=c0000001e8de5268, len=512
Redzone: 0x5a2cf071/0x5a2cf071.
Last user: [<c00000000029c3a0>](.scsi_target_dev_release+0x28/0x50)
080: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6a
Prev obj: start=c0000001e8de5050, len=512
Redzone: 0x5a2cf071/0x5a2cf071.
Last user: [<0000000000000000>](0x0)
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
Next obj: start=c0000001e8de5480, len=512
Redzone: 0x170fc2a5/0x170fc2a5.
Last user: [<c000000000228d7c>](.as_init_queue+0x5c/0x228)
000: c0 00 00 01 e8 83 26 08 00 00 00 00 00 00 00 00
010: 00 00 00 00 00 00 00 00 c0 00 00 01 e8 de 54 98
Slab corruption: start=c0000001e8de5268, len=512
Redzone: 0x5a2cf071/0x5a2cf071.
Last user: [<c00000000029c3a0>](.scsi_target_dev_release+0x28/0x50)
080: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6a
Prev obj: start=c0000001e8de5050, len=512
Redzone: 0x5a2cf071/0x5a2cf071.
Last user: [<0000000000000000>](0x0)
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
Next obj: start=c0000001e8de5480, len=512
Redzone: 0x170fc2a5/0x170fc2a5.
Last user: [<c000000000228d7c>](.as_init_queue+0x5c/0x228)
000: c0 00 00 01 e8 83 26 08 00 00 00 00 00 00 00 00
010: 00 00 00 00 00 00 00 00 c0 00 00 01 e8 de 54 98
...

I did some digging and the problem seems to be a refcounting issue in
__scsi_add_device.  The target gets freed in scsi_target_reap, and
then __scsi_add_device tries to do another device_put on it.

Signed-off-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_scan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index cca772624ae..8d0d302844a 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1197,6 +1197,7 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
 	if (!starget)
 		return ERR_PTR(-ENOMEM);
 
+	get_device(&starget->dev);
 	down(&shost->scan_mutex);
 	res = scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata);
 	if (res != SCSI_SCAN_LUN_PRESENT)
-- 
cgit v1.2.3