diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 15 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 169 | ||||
-rw-r--r-- | drivers/md/dm.c | 4 | ||||
-rw-r--r-- | drivers/md/md.c | 71 | ||||
-rw-r--r-- | drivers/md/raid1.c | 3 | ||||
-rw-r--r-- | drivers/md/raid10.c | 3 |
6 files changed, 160 insertions, 105 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 64bf3a81db9..531d4d17d01 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -2,19 +2,17 @@ # Block device driver configuration # -if BLOCK - -menu "Multi-device support (RAID and LVM)" - -config MD +menuconfig MD bool "Multiple devices driver support (RAID and LVM)" + depends on BLOCK help Support multiple physical spindles through a single logical device. Required for RAID and logical volume management. +if MD + config BLK_DEV_MD tristate "RAID support" - depends on MD ---help--- This driver lets you combine several hard disk partitions into one logical block device. This can be used to simply append one @@ -191,7 +189,6 @@ config MD_FAULTY config BLK_DEV_DM tristate "Device mapper support" - depends on MD ---help--- Device-mapper is a low level volume manager. It works by allowing people to specify mappings for ranges of logical sectors. Various @@ -279,6 +276,4 @@ config DM_DELAY If unsure, say N. -endmenu - -endif +endif # MD diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 9620d452d03..927cb34c480 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -268,6 +268,31 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) if (page->index == bitmap->file_pages-1) size = roundup(bitmap->last_page_size, bdev_hardsect_size(rdev->bdev)); + /* Just make sure we aren't corrupting data or + * metadata + */ + if (bitmap->offset < 0) { + /* DATA BITMAP METADATA */ + if (bitmap->offset + + page->index * (PAGE_SIZE/512) + + size/512 > 0) + /* bitmap runs in to metadata */ + return -EINVAL; + if (rdev->data_offset + mddev->size*2 + > rdev->sb_offset*2 + bitmap->offset) + /* data runs in to bitmap */ + return -EINVAL; + } else if (rdev->sb_offset*2 < rdev->data_offset) { + /* METADATA BITMAP DATA */ + if (rdev->sb_offset*2 + + bitmap->offset + + page->index*(PAGE_SIZE/512) + size/512 + > rdev->data_offset) + /* bitmap runs in to data */ + return -EINVAL; + } else { + /* DATA METADATA BITMAP - no problems */ + } md_super_write(mddev, rdev, (rdev->sb_offset<<1) + bitmap->offset + page->index * (PAGE_SIZE/512), @@ -280,32 +305,38 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) return 0; } +static void bitmap_file_kick(struct bitmap *bitmap); /* * write out a page to a file */ -static int write_page(struct bitmap *bitmap, struct page *page, int wait) +static void write_page(struct bitmap *bitmap, struct page *page, int wait) { struct buffer_head *bh; - if (bitmap->file == NULL) - return write_sb_page(bitmap, page, wait); + if (bitmap->file == NULL) { + switch (write_sb_page(bitmap, page, wait)) { + case -EINVAL: + bitmap->flags |= BITMAP_WRITE_ERROR; + } + } else { - bh = page_buffers(page); + bh = page_buffers(page); - while (bh && bh->b_blocknr) { - atomic_inc(&bitmap->pending_writes); - set_buffer_locked(bh); - set_buffer_mapped(bh); - submit_bh(WRITE, bh); - bh = bh->b_this_page; - } + while (bh && bh->b_blocknr) { + atomic_inc(&bitmap->pending_writes); + set_buffer_locked(bh); + set_buffer_mapped(bh); + submit_bh(WRITE, bh); + bh = bh->b_this_page; + } - if (wait) { - wait_event(bitmap->write_wait, - atomic_read(&bitmap->pending_writes)==0); - return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; + if (wait) { + wait_event(bitmap->write_wait, + atomic_read(&bitmap->pending_writes)==0); + } } - return 0; + if (bitmap->flags & BITMAP_WRITE_ERROR) + bitmap_file_kick(bitmap); } static void end_bitmap_write(struct buffer_head *bh, int uptodate) @@ -425,17 +456,17 @@ out: */ /* update the event counter and sync the superblock to disk */ -int bitmap_update_sb(struct bitmap *bitmap) +void bitmap_update_sb(struct bitmap *bitmap) { bitmap_super_t *sb; unsigned long flags; if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ - return 0; + return; spin_lock_irqsave(&bitmap->lock, flags); if (!bitmap->sb_page) { /* no superblock */ spin_unlock_irqrestore(&bitmap->lock, flags); - return 0; + return; } spin_unlock_irqrestore(&bitmap->lock, flags); sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); @@ -443,7 +474,7 @@ int bitmap_update_sb(struct bitmap *bitmap) if (!bitmap->mddev->degraded) sb->events_cleared = cpu_to_le64(bitmap->mddev->events); kunmap_atomic(sb, KM_USER0); - return write_page(bitmap, bitmap->sb_page, 1); + write_page(bitmap, bitmap->sb_page, 1); } /* print out the bitmap file superblock */ @@ -572,20 +603,22 @@ enum bitmap_mask_op { MASK_UNSET }; -/* record the state of the bitmap in the superblock */ -static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, - enum bitmap_mask_op op) +/* record the state of the bitmap in the superblock. Return the old value */ +static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, + enum bitmap_mask_op op) { bitmap_super_t *sb; unsigned long flags; + int old; spin_lock_irqsave(&bitmap->lock, flags); if (!bitmap->sb_page) { /* can't set the state */ spin_unlock_irqrestore(&bitmap->lock, flags); - return; + return 0; } spin_unlock_irqrestore(&bitmap->lock, flags); sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); + old = le32_to_cpu(sb->state) & bits; switch (op) { case MASK_SET: sb->state |= cpu_to_le32(bits); break; @@ -594,6 +627,7 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, default: BUG(); } kunmap_atomic(sb, KM_USER0); + return old; } /* @@ -687,18 +721,23 @@ static void bitmap_file_kick(struct bitmap *bitmap) { char *path, *ptr = NULL; - bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET); - bitmap_update_sb(bitmap); + if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) { + bitmap_update_sb(bitmap); - if (bitmap->file) { - path = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (path) - ptr = file_path(bitmap->file, path, PAGE_SIZE); + if (bitmap->file) { + path = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (path) + ptr = file_path(bitmap->file, path, PAGE_SIZE); - printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", - bmname(bitmap), ptr ? ptr : ""); + printk(KERN_ALERT + "%s: kicking failed bitmap file %s from array!\n", + bmname(bitmap), ptr ? ptr : ""); - kfree(path); + kfree(path); + } else + printk(KERN_ALERT + "%s: disabling internal bitmap due to errors\n", + bmname(bitmap)); } bitmap_file_put(bitmap); @@ -769,16 +808,15 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) /* this gets called when the md device is ready to unplug its underlying * (slave) device queues -- before we let any writes go down, we need to * sync the dirty pages of the bitmap file to disk */ -int bitmap_unplug(struct bitmap *bitmap) +void bitmap_unplug(struct bitmap *bitmap) { unsigned long i, flags; int dirty, need_write; struct page *page; int wait = 0; - int err; if (!bitmap) - return 0; + return; /* look at each page to see if there are any set bits that need to be * flushed out to disk */ @@ -786,7 +824,7 @@ int bitmap_unplug(struct bitmap *bitmap) spin_lock_irqsave(&bitmap->lock, flags); if (!bitmap->filemap) { spin_unlock_irqrestore(&bitmap->lock, flags); - return 0; + return; } page = bitmap->filemap[i]; dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); @@ -798,7 +836,7 @@ int bitmap_unplug(struct bitmap *bitmap) spin_unlock_irqrestore(&bitmap->lock, flags); if (dirty | need_write) - err = write_page(bitmap, page, 0); + write_page(bitmap, page, 0); } if (wait) { /* if any writes were performed, we need to wait on them */ if (bitmap->file) @@ -809,7 +847,6 @@ int bitmap_unplug(struct bitmap *bitmap) } if (bitmap->flags & BITMAP_WRITE_ERROR) bitmap_file_kick(bitmap); - return 0; } static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); @@ -858,21 +895,21 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) bmname(bitmap), (unsigned long) i_size_read(file->f_mapping->host), bytes + sizeof(bitmap_super_t)); - goto out; + goto err; } ret = -ENOMEM; bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); if (!bitmap->filemap) - goto out; + goto err; /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ bitmap->filemap_attr = kzalloc( roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), GFP_KERNEL); if (!bitmap->filemap_attr) - goto out; + goto err; oldindex = ~0L; @@ -905,7 +942,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) } if (IS_ERR(page)) { /* read error */ ret = PTR_ERR(page); - goto out; + goto err; } oldindex = index; @@ -920,11 +957,13 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) memset(paddr + offset, 0xff, PAGE_SIZE - offset); kunmap_atomic(paddr, KM_USER0); - ret = write_page(bitmap, page, 1); - if (ret) { + write_page(bitmap, page, 1); + + ret = -EIO; + if (bitmap->flags & BITMAP_WRITE_ERROR) { /* release, page not in filemap yet */ put_page(page); - goto out; + goto err; } } @@ -956,11 +995,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) md_wakeup_thread(bitmap->mddev->thread); } -out: printk(KERN_INFO "%s: bitmap initialized from disk: " - "read %lu/%lu pages, set %lu bits, status: %d\n", - bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, ret); + "read %lu/%lu pages, set %lu bits\n", + bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt); + + return 0; + err: + printk(KERN_INFO "%s: bitmap initialisation failed: %d\n", + bmname(bitmap), ret); return ret; } @@ -997,19 +1040,18 @@ static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, * out to disk */ -int bitmap_daemon_work(struct bitmap *bitmap) +void bitmap_daemon_work(struct bitmap *bitmap) { unsigned long j; unsigned long flags; struct page *page = NULL, *lastpage = NULL; - int err = 0; int blocks; void *paddr; if (bitmap == NULL) - return 0; + return; if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) - return 0; + return; bitmap->daemon_lastrun = jiffies; for (j = 0; j < bitmap->chunks; j++) { @@ -1032,14 +1074,8 @@ int bitmap_daemon_work(struct bitmap *bitmap) clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - if (need_write) { - switch (write_page(bitmap, page, 0)) { - case 0: - break; - default: - bitmap_file_kick(bitmap); - } - } + if (need_write) + write_page(bitmap, page, 0); continue; } @@ -1048,13 +1084,11 @@ int bitmap_daemon_work(struct bitmap *bitmap) if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - err = write_page(bitmap, lastpage, 0); + write_page(bitmap, lastpage, 0); } else { set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); } - if (err) - bitmap_file_kick(bitmap); } else spin_unlock_irqrestore(&bitmap->lock, flags); lastpage = page; @@ -1097,14 +1131,13 @@ int bitmap_daemon_work(struct bitmap *bitmap) if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - err = write_page(bitmap, lastpage, 0); + write_page(bitmap, lastpage, 0); } else { set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); } } - return err; } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1517,7 +1550,9 @@ int bitmap_create(mddev_t *mddev) mddev->thread->timeout = bitmap->daemon_sleep * HZ; - return bitmap_update_sb(bitmap); + bitmap_update_sb(bitmap); + + return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; error: bitmap_free(bitmap); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f4f7d35561a..846614e676c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -161,9 +161,7 @@ static void local_exit(void) { kmem_cache_destroy(_tio_cache); kmem_cache_destroy(_io_cache); - - if (unregister_blkdev(_major, _name) < 0) - DMERR("unregister_blkdev failed"); + unregister_blkdev(_major, _name); _major = 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index 33beaa7da08..65ddc887dfd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1640,7 +1640,6 @@ static void sync_sbs(mddev_t * mddev, int nospares) static void md_update_sb(mddev_t * mddev, int force_change) { - int err; struct list_head *tmp; mdk_rdev_t *rdev; int sync_req; @@ -1727,7 +1726,7 @@ repeat: "md: updating %s RAID superblock on device (in sync %d)\n", mdname(mddev),mddev->in_sync); - err = bitmap_update_sb(mddev->bitmap); + bitmap_update_sb(mddev->bitmap); ITERATE_RDEV(mddev,rdev,tmp) { char b[BDEVNAME_SIZE]; dprintk(KERN_INFO "md: "); @@ -2073,9 +2072,11 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi err = super_types[super_format]. load_super(rdev, NULL, super_minor); if (err == -EINVAL) { - printk(KERN_WARNING - "md: %s has invalid sb, not importing!\n", - bdevname(rdev->bdev,b)); + printk(KERN_WARNING + "md: %s does not have a valid v%d.%d " + "superblock, not importing!\n", + bdevname(rdev->bdev,b), + super_format, super_minor); goto abort_free; } if (err < 0) { @@ -3174,13 +3175,33 @@ static int do_md_run(mddev_t * mddev) * Drop all container device buffers, from now on * the only valid external interface is through the md * device. - * Also find largest hardsector size */ ITERATE_RDEV(mddev,rdev,tmp) { if (test_bit(Faulty, &rdev->flags)) continue; sync_blockdev(rdev->bdev); invalidate_bdev(rdev->bdev); + + /* perform some consistency tests on the device. + * We don't want the data to overlap the metadata, + * Internal Bitmap issues has handled elsewhere. + */ + if (rdev->data_offset < rdev->sb_offset) { + if (mddev->size && + rdev->data_offset + mddev->size*2 + > rdev->sb_offset*2) { + printk("md: %s: data overlaps metadata\n", + mdname(mddev)); + return -EINVAL; + } + } else { + if (rdev->sb_offset*2 + rdev->sb_size/512 + > rdev->data_offset) { + printk("md: %s: metadata overlaps data\n", + mdname(mddev)); + return -EINVAL; + } + } } md_probe(mddev->unit, NULL, NULL); @@ -4642,7 +4663,6 @@ static int md_thread(void * arg) * many dirty RAID5 blocks. */ - current->flags |= PF_NOFREEZE; allow_signal(SIGKILL); while (!kthread_should_stop()) { @@ -5090,7 +5110,7 @@ static int is_mddev_idle(mddev_t *mddev) mdk_rdev_t * rdev; struct list_head *tmp; int idle; - unsigned long curr_events; + long curr_events; idle = 1; ITERATE_RDEV(mddev,rdev,tmp) { @@ -5098,20 +5118,29 @@ static int is_mddev_idle(mddev_t *mddev) curr_events = disk_stat_read(disk, sectors[0]) + disk_stat_read(disk, sectors[1]) - atomic_read(&disk->sync_io); - /* The difference between curr_events and last_events - * will be affected by any new non-sync IO (making - * curr_events bigger) and any difference in the amount of - * in-flight syncio (making current_events bigger or smaller) - * The amount in-flight is currently limited to - * 32*64K in raid1/10 and 256*PAGE_SIZE in raid5/6 - * which is at most 4096 sectors. - * These numbers are fairly fragile and should be made - * more robust, probably by enforcing the - * 'window size' that md_do_sync sort-of uses. + /* sync IO will cause sync_io to increase before the disk_stats + * as sync_io is counted when a request starts, and + * disk_stats is counted when it completes. + * So resync activity will cause curr_events to be smaller than + * when there was no such activity. + * non-sync IO will cause disk_stat to increase without + * increasing sync_io so curr_events will (eventually) + * be larger than it was before. Once it becomes + * substantially larger, the test below will cause + * the array to appear non-idle, and resync will slow + * down. + * If there is a lot of outstanding resync activity when + * we set last_event to curr_events, then all that activity + * completing might cause the array to appear non-idle + * and resync will be slowed down even though there might + * not have been non-resync activity. This will only + * happen once though. 'last_events' will soon reflect + * the state where there is little or no outstanding + * resync requests, and further resync activity will + * always make curr_events less than last_events. * - * Note: the following is an unsigned comparison. */ - if ((long)curr_events - (long)rdev->last_events > 4096) { + if (curr_events - rdev->last_events > 4096) { rdev->last_events = curr_events; idle = 0; } @@ -5772,7 +5801,7 @@ static void autostart_arrays(int part) for (i = 0; i < dev_cnt; i++) { dev_t dev = detected_devices[i]; - rdev = md_import_device(dev,0, 0); + rdev = md_import_device(dev,0, 90); if (IS_ERR(rdev)) continue; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 46677d7d998..00c78b77b13 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1526,8 +1526,7 @@ static void raid1d(mddev_t *mddev) blk_remove_plug(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - if (bitmap_unplug(mddev->bitmap) != 0) - printk("%s: bitmap file write failed!\n", mdname(mddev)); + bitmap_unplug(mddev->bitmap); while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9eb66c1b523..a95ada1cfac 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1510,8 +1510,7 @@ static void raid10d(mddev_t *mddev) blk_remove_plug(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); /* flush any pending bitmap writes to disk before proceeding w/ I/O */ - if (bitmap_unplug(mddev->bitmap) != 0) - printk("%s: bitmap file write failed!\n", mdname(mddev)); + bitmap_unplug(mddev->bitmap); while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; |