From d0076f7754dce07c7a1d752034561acadd99eafa Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Thu, 15 Nov 2007 13:57:08 +0100 Subject: [SCSI] zfcp: fix dismissal of error recovery actions zfcp_erp_action_dismiss() used to ignore any actions in the ready list. This is a bug. Any action superseded by a stronger action needs to be dismissed. This patch changes zfcp_erp_action_dismiss() so that it dismisses actions regardless of their list affiliation. The ERP thread is able to handle this. It is important to kick the erp thread only for actions in the running list, though, as an imbalance of wakeup signals would confuse the erp thread otherwise. Signed-off-by: Martin Peschke Acked-by: Swen Schillig Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 5552b755c08..ad5b481ddaf 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -977,7 +977,9 @@ static void zfcp_erp_action_dismiss(struct zfcp_erp_action *erp_action) debug_text_event(adapter->erp_dbf, 2, "a_adis"); debug_event(adapter->erp_dbf, 2, &erp_action->action, sizeof (int)); - zfcp_erp_async_handler_nolock(erp_action, ZFCP_STATUS_ERP_DISMISSED); + erp_action->status |= ZFCP_STATUS_ERP_DISMISSED; + if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) + zfcp_erp_action_ready(erp_action); } int -- cgit v1.2.3 From 86e8dfc5603ed76917eed0a9dd9e85a1e1a8b162 Mon Sep 17 00:00:00 2001 From: Martin Peschke Date: Thu, 15 Nov 2007 13:57:17 +0100 Subject: [SCSI] zfcp: fix cleanup of dismissed error recovery actions Calling zfcp_erp_strategy_check_action() after zfcp_erp_action_to_running() in zfcp_erp_strategy() might cause an unbalanced up() for erp_ready_sem, which makes the zfcp recovery fail somewhere along the way: erp thread processing erp_action: | | someone waking up erp thread for erp_action | | | | someone else dismissing erp_action: | | | V V V write_lock_irqsave(&adapter->erp_lock, flags); ... if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* first up() for erp_action */ } write_unlock_irqrestore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); ... zfcp_erp_action_to_running(erp_action); write_unlock_restore(&adapter->erp_lock, flags); /* processing erp_action */ write_lock_irqsave(&adapter->erp_lock, flags); ... erp_action->status |= ZFCP_STATUS_ERP_DISMISSED; if (zfcp_erp_action_exists(erp_action) == ZFCP_ERP_ACTION_RUNNING) { zfcp_erp_action_to_ready(erp_action); up(&adapter->erp_ready_sem); /* second, unbalanced up() for erp_action */ } ... write_unlock_restore(&adapter->erp_lock, flags); write_lock_irqsave(&adapter->erp_lock, flags); if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) { zfcp_erp_action_dequeue(erp_action); retval = ZFCP_ERP_DISMISSED; } ... write_unlock_restore(&adapter->erp_lock, flags); down(&adapter->erp_ready_sem); /* this down() is meant to balance the first up() */ The erp thread must not dismiss an erp_action after moving that action to erp_running_head. Instead it should just go through the down() operation, which balances the first up(), and run through zfcp_erp_strategy one more time for the second up(), which eventually cleans up erp_action. Which is similar to the normal processing of an event for erp_action doing something asynchronously (e.g. waiting for the completion of an fsf_req). This only works if we make sure that a dismissed erp_action is passed to zfcp_erp_strategy() prior to the other action, which caused actions to be dismissed. Therefore the patch implements this rule: running actions go to the head of the ready list; new actions go to the tail of the ready list; the erp thread picks actions to be processed from the ready list's head. Signed-off-by: Martin Peschke Acked-by: Swen Schillig Signed-off-by: James Bottomley --- drivers/s390/scsi/zfcp_erp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index ad5b481ddaf..07fa824d179 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -1065,7 +1065,7 @@ zfcp_erp_thread(void *data) &adapter->status)) { write_lock_irqsave(&adapter->erp_lock, flags); - next = adapter->erp_ready_head.prev; + next = adapter->erp_ready_head.next; write_unlock_irqrestore(&adapter->erp_lock, flags); if (next != &adapter->erp_ready_head) { @@ -1155,15 +1155,13 @@ zfcp_erp_strategy(struct zfcp_erp_action *erp_action) /* * check for dismissed status again to avoid follow-up actions, - * failing of targets and so on for dismissed actions + * failing of targets and so on for dismissed actions, + * we go through down() here because there has been an up() */ - retval = zfcp_erp_strategy_check_action(erp_action, retval); + if (erp_action->status & ZFCP_STATUS_ERP_DISMISSED) + retval = ZFCP_ERP_CONTINUES; switch (retval) { - case ZFCP_ERP_DISMISSED: - /* leave since this action has ridden to its ancestors */ - debug_text_event(adapter->erp_dbf, 6, "a_st_dis2"); - goto unlock; case ZFCP_ERP_NOMEM: /* no memory to continue immediately, let it sleep */ if (!(erp_action->status & ZFCP_STATUS_ERP_LOWMEM)) { @@ -3091,7 +3089,7 @@ zfcp_erp_action_enqueue(int action, ++adapter->erp_total_count; /* finally put it into 'ready' queue and kick erp thread */ - list_add(&erp_action->list, &adapter->erp_ready_head); + list_add_tail(&erp_action->list, &adapter->erp_ready_head); up(&adapter->erp_ready_sem); retval = 0; out: -- cgit v1.2.3