From c04be0aa82ff535e3676ab3e573957bdeef41879 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 3 Oct 2006 01:15:53 -0700 Subject: [PATCH] md: Improve locking around error handling The error handling routines don't use proper locking, and so two concurrent errors could trigger a problem. So: - use test-and-set and test-and-clear to synchonise the In_sync bits with the ->degraded count - use the spinlock to protect updates to the degraded count (could use an atomic_t but that would be a bigger change in code, and isn't really justified) - remove un-necessary locking in raid5 Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid10.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/md/raid10.c') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 233a4faede9..64f8016ab74 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -950,14 +950,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) * really dead" tests... */ return; - if (test_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; + spin_unlock_irqrestore(&conf->device_lock, flags); /* * if recovery is running, make sure it aborts. */ set_bit(MD_RECOVERY_ERR, &mddev->recovery); } - clear_bit(In_sync, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" @@ -1033,9 +1035,11 @@ static int raid10_spare_active(mddev_t *mddev) tmp = conf->mirrors + i; if (tmp->rdev && !test_bit(Faulty, &tmp->rdev->flags) - && !test_bit(In_sync, &tmp->rdev->flags)) { + && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded--; - set_bit(In_sync, &tmp->rdev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); } } -- cgit v1.2.3