diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-11-14 12:59:32 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-11-14 12:59:32 -0800 |
commit | 0e70613b1c26bf23909d0e778de52d0cc6d88835 (patch) | |
tree | 2978295a4805983afe16dbf1372a4ff679026d2d | |
parent | e0a2af1e60ae89b18ef3afbf655f096564751045 (diff) | |
parent | c148ffdcda00b6599b70f8b65e6a1fadd1dbb127 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md:
md/raid5: Allow dirty-degraded arrays to be assembled when only party is degraded.
Don't unconditionally set in_sync on newly added device in raid5_reshape
md: allow v0.91 metadata to record devices as being active but not in-sync.
md: factor out updating of 'recovery_offset'.
-rw-r--r-- | drivers/md/md.c | 41 | ||||
-rw-r--r-- | drivers/md/raid5.c | 85 |
2 files changed, 112 insertions, 14 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index e64c971038d..b182f86a19d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) desc->raid_disk < mddev->raid_disks */) { set_bit(In_sync, &rdev->flags); rdev->raid_disk = desc->raid_disk; + } else if (desc->state & (1<<MD_DISK_ACTIVE)) { + /* active but not in sync implies recovery up to + * reshape position. We don't know exactly where + * that is, so set to zero for now */ + if (mddev->minor_version >= 91) { + rdev->recovery_offset = 0; + rdev->raid_disk = desc->raid_disk; + } } if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) set_bit(WriteMostly, &rdev->flags); @@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) list_for_each_entry(rdev2, &mddev->disks, same_set) { mdp_disk_t *d; int desc_nr; - if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) - && !test_bit(Faulty, &rdev2->flags)) + int is_active = test_bit(In_sync, &rdev2->flags); + + if (rdev2->raid_disk >= 0 && + sb->minor_version >= 91) + /* we have nowhere to store the recovery_offset, + * but if it is not below the reshape_position, + * we can piggy-back on that. + */ + is_active = 1; + if (rdev2->raid_disk < 0 || + test_bit(Faulty, &rdev2->flags)) + is_active = 0; + if (is_active) desc_nr = rdev2->raid_disk; else desc_nr = next_spare++; @@ -1043,16 +1062,16 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) d->number = rdev2->desc_nr; d->major = MAJOR(rdev2->bdev->bd_dev); d->minor = MINOR(rdev2->bdev->bd_dev); - if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) - && !test_bit(Faulty, &rdev2->flags)) + if (is_active) d->raid_disk = rdev2->raid_disk; else d->raid_disk = rdev2->desc_nr; /* compatibility */ if (test_bit(Faulty, &rdev2->flags)) d->state = (1<<MD_DISK_FAULTY); - else if (test_bit(In_sync, &rdev2->flags)) { + else if (is_active) { d->state = (1<<MD_DISK_ACTIVE); - d->state |= (1<<MD_DISK_SYNC); + if (test_bit(In_sync, &rdev2->flags)) + d->state |= (1<<MD_DISK_SYNC); active++; working++; } else { @@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags)) { - if (mddev->curr_resync_completed > rdev->recovery_offset) - rdev->recovery_offset = mddev->curr_resync_completed; if (rdev->recovery_offset > 0) { sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); @@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares) */ mdk_rdev_t *rdev; + /* First make sure individual recovery_offsets are correct */ + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->raid_disk >= 0 && + !test_bit(In_sync, &rdev->flags) && + mddev->curr_resync_completed > rdev->recovery_offset) + rdev->recovery_offset = mddev->curr_resync_completed; + + } list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->sb_events == mddev->events || (nospares && diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dcce204b6c7..d29215d966d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4823,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) return ERR_PTR(-ENOMEM); } + +static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded) +{ + switch (algo) { + case ALGORITHM_PARITY_0: + if (raid_disk < max_degraded) + return 1; + break; + case ALGORITHM_PARITY_N: + if (raid_disk >= raid_disks - max_degraded) + return 1; + break; + case ALGORITHM_PARITY_0_6: + if (raid_disk == 0 || + raid_disk == raid_disks - 1) + return 1; + break; + case ALGORITHM_LEFT_ASYMMETRIC_6: + case ALGORITHM_RIGHT_ASYMMETRIC_6: + case ALGORITHM_LEFT_SYMMETRIC_6: + case ALGORITHM_RIGHT_SYMMETRIC_6: + if (raid_disk == raid_disks - 1) + return 1; + } + return 0; +} + static int run(mddev_t *mddev) { raid5_conf_t *conf; int working_disks = 0, chunk_size; + int dirty_parity_disks = 0; mdk_rdev_t *rdev; + sector_t reshape_offset = 0; if (mddev->recovery_cp != MaxSector) printk(KERN_NOTICE "raid5: %s is not clean" @@ -4861,6 +4890,7 @@ static int run(mddev_t *mddev) "on a stripe boundary\n"); return -EINVAL; } + reshape_offset = here_new * mddev->new_chunk_sectors; /* here_new is the stripe we will write to */ here_old = mddev->reshape_position; sector_div(here_old, mddev->chunk_sectors * @@ -4916,10 +4946,51 @@ static int run(mddev_t *mddev) /* * 0 for a fully functional array, 1 or 2 for a degraded array. */ - list_for_each_entry(rdev, &mddev->disks, same_set) - if (rdev->raid_disk >= 0 && - test_bit(In_sync, &rdev->flags)) + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->raid_disk < 0) + continue; + if (test_bit(In_sync, &rdev->flags)) working_disks++; + /* This disc is not fully in-sync. However if it + * just stored parity (beyond the recovery_offset), + * when we don't need to be concerned about the + * array being dirty. + * When reshape goes 'backwards', we never have + * partially completed devices, so we only need + * to worry about reshape going forwards. + */ + /* Hack because v0.91 doesn't store recovery_offset properly. */ + if (mddev->major_version == 0 && + mddev->minor_version > 90) + rdev->recovery_offset = reshape_offset; + + printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n", + rdev->raid_disk, working_disks, conf->prev_algo, + conf->previous_raid_disks, conf->max_degraded, + conf->algorithm, conf->raid_disks, + only_parity(rdev->raid_disk, + conf->prev_algo, + conf->previous_raid_disks, + conf->max_degraded), + only_parity(rdev->raid_disk, + conf->algorithm, + conf->raid_disks, + conf->max_degraded)); + if (rdev->recovery_offset < reshape_offset) { + /* We need to check old and new layout */ + if (!only_parity(rdev->raid_disk, + conf->algorithm, + conf->raid_disks, + conf->max_degraded)) + continue; + } + if (!only_parity(rdev->raid_disk, + conf->prev_algo, + conf->previous_raid_disks, + conf->max_degraded)) + continue; + dirty_parity_disks++; + } mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks) - working_disks); @@ -4935,7 +5006,7 @@ static int run(mddev_t *mddev) mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); mddev->resync_max_sectors = mddev->dev_sectors; - if (mddev->degraded > 0 && + if (mddev->degraded > dirty_parity_disks && mddev->recovery_cp != MaxSector) { if (mddev->ok_start_degraded) printk(KERN_WARNING @@ -5361,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev) !test_bit(Faulty, &rdev->flags)) { if (raid5_add_disk(mddev, rdev) == 0) { char nm[20]; - set_bit(In_sync, &rdev->flags); + if (rdev->raid_disk >= conf->previous_raid_disks) + set_bit(In_sync, &rdev->flags); + else + rdev->recovery_offset = 0; added_devices++; - rdev->recovery_offset = 0; sprintf(nm, "rd%d", rdev->raid_disk); if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) |