aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig15
-rw-r--r--drivers/md/bitmap.c169
-rw-r--r--drivers/md/dm.c4
-rw-r--r--drivers/md/md.c71
-rw-r--r--drivers/md/raid1.c3
-rw-r--r--drivers/md/raid10.c3
6 files changed, 160 insertions, 105 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 64bf3a81db9..531d4d17d01 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -2,19 +2,17 @@
# Block device driver configuration
#
-if BLOCK
-
-menu "Multi-device support (RAID and LVM)"
-
-config MD
+menuconfig MD
bool "Multiple devices driver support (RAID and LVM)"
+ depends on BLOCK
help
Support multiple physical spindles through a single logical device.
Required for RAID and logical volume management.
+if MD
+
config BLK_DEV_MD
tristate "RAID support"
- depends on MD
---help---
This driver lets you combine several hard disk partitions into one
logical block device. This can be used to simply append one
@@ -191,7 +189,6 @@ config MD_FAULTY
config BLK_DEV_DM
tristate "Device mapper support"
- depends on MD
---help---
Device-mapper is a low level volume manager. It works by allowing
people to specify mappings for ranges of logical sectors. Various
@@ -279,6 +276,4 @@ config DM_DELAY
If unsure, say N.
-endmenu
-
-endif
+endif # MD
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 9620d452d03..927cb34c480 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -268,6 +268,31 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
if (page->index == bitmap->file_pages-1)
size = roundup(bitmap->last_page_size,
bdev_hardsect_size(rdev->bdev));
+ /* Just make sure we aren't corrupting data or
+ * metadata
+ */
+ if (bitmap->offset < 0) {
+ /* DATA BITMAP METADATA */
+ if (bitmap->offset
+ + page->index * (PAGE_SIZE/512)
+ + size/512 > 0)
+ /* bitmap runs in to metadata */
+ return -EINVAL;
+ if (rdev->data_offset + mddev->size*2
+ > rdev->sb_offset*2 + bitmap->offset)
+ /* data runs in to bitmap */
+ return -EINVAL;
+ } else if (rdev->sb_offset*2 < rdev->data_offset) {
+ /* METADATA BITMAP DATA */
+ if (rdev->sb_offset*2
+ + bitmap->offset
+ + page->index*(PAGE_SIZE/512) + size/512
+ > rdev->data_offset)
+ /* bitmap runs in to data */
+ return -EINVAL;
+ } else {
+ /* DATA METADATA BITMAP - no problems */
+ }
md_super_write(mddev, rdev,
(rdev->sb_offset<<1) + bitmap->offset
+ page->index * (PAGE_SIZE/512),
@@ -280,32 +305,38 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
return 0;
}
+static void bitmap_file_kick(struct bitmap *bitmap);
/*
* write out a page to a file
*/
-static int write_page(struct bitmap *bitmap, struct page *page, int wait)
+static void write_page(struct bitmap *bitmap, struct page *page, int wait)
{
struct buffer_head *bh;
- if (bitmap->file == NULL)
- return write_sb_page(bitmap, page, wait);
+ if (bitmap->file == NULL) {
+ switch (write_sb_page(bitmap, page, wait)) {
+ case -EINVAL:
+ bitmap->flags |= BITMAP_WRITE_ERROR;
+ }
+ } else {
- bh = page_buffers(page);
+ bh = page_buffers(page);
- while (bh && bh->b_blocknr) {
- atomic_inc(&bitmap->pending_writes);
- set_buffer_locked(bh);
- set_buffer_mapped(bh);
- submit_bh(WRITE, bh);
- bh = bh->b_this_page;
- }
+ while (bh && bh->b_blocknr) {
+ atomic_inc(&bitmap->pending_writes);
+ set_buffer_locked(bh);
+ set_buffer_mapped(bh);
+ submit_bh(WRITE, bh);
+ bh = bh->b_this_page;
+ }
- if (wait) {
- wait_event(bitmap->write_wait,
- atomic_read(&bitmap->pending_writes)==0);
- return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
+ if (wait) {
+ wait_event(bitmap->write_wait,
+ atomic_read(&bitmap->pending_writes)==0);
+ }
}
- return 0;
+ if (bitmap->flags & BITMAP_WRITE_ERROR)
+ bitmap_file_kick(bitmap);
}
static void end_bitmap_write(struct buffer_head *bh, int uptodate)
@@ -425,17 +456,17 @@ out:
*/
/* update the event counter and sync the superblock to disk */
-int bitmap_update_sb(struct bitmap *bitmap)
+void bitmap_update_sb(struct bitmap *bitmap)
{
bitmap_super_t *sb;
unsigned long flags;
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
- return 0;
+ return;
spin_lock_irqsave(&bitmap->lock, flags);
if (!bitmap->sb_page) { /* no superblock */
spin_unlock_irqrestore(&bitmap->lock, flags);
- return 0;
+ return;
}
spin_unlock_irqrestore(&bitmap->lock, flags);
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
@@ -443,7 +474,7 @@ int bitmap_update_sb(struct bitmap *bitmap)
if (!bitmap->mddev->degraded)
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
kunmap_atomic(sb, KM_USER0);
- return write_page(bitmap, bitmap->sb_page, 1);
+ write_page(bitmap, bitmap->sb_page, 1);
}
/* print out the bitmap file superblock */
@@ -572,20 +603,22 @@ enum bitmap_mask_op {
MASK_UNSET
};
-/* record the state of the bitmap in the superblock */
-static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
- enum bitmap_mask_op op)
+/* record the state of the bitmap in the superblock. Return the old value */
+static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
+ enum bitmap_mask_op op)
{
bitmap_super_t *sb;
unsigned long flags;
+ int old;
spin_lock_irqsave(&bitmap->lock, flags);
if (!bitmap->sb_page) { /* can't set the state */
spin_unlock_irqrestore(&bitmap->lock, flags);
- return;
+ return 0;
}
spin_unlock_irqrestore(&bitmap->lock, flags);
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
+ old = le32_to_cpu(sb->state) & bits;
switch (op) {
case MASK_SET: sb->state |= cpu_to_le32(bits);
break;
@@ -594,6 +627,7 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
default: BUG();
}
kunmap_atomic(sb, KM_USER0);
+ return old;
}
/*
@@ -687,18 +721,23 @@ static void bitmap_file_kick(struct bitmap *bitmap)
{
char *path, *ptr = NULL;
- bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET);
- bitmap_update_sb(bitmap);
+ if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) {
+ bitmap_update_sb(bitmap);
- if (bitmap->file) {
- path = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (path)
- ptr = file_path(bitmap->file, path, PAGE_SIZE);
+ if (bitmap->file) {
+ path = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (path)
+ ptr = file_path(bitmap->file, path, PAGE_SIZE);
- printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n",
- bmname(bitmap), ptr ? ptr : "");
+ printk(KERN_ALERT
+ "%s: kicking failed bitmap file %s from array!\n",
+ bmname(bitmap), ptr ? ptr : "");
- kfree(path);
+ kfree(path);
+ } else
+ printk(KERN_ALERT
+ "%s: disabling internal bitmap due to errors\n",
+ bmname(bitmap));
}
bitmap_file_put(bitmap);
@@ -769,16 +808,15 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
/* this gets called when the md device is ready to unplug its underlying
* (slave) device queues -- before we let any writes go down, we need to
* sync the dirty pages of the bitmap file to disk */
-int bitmap_unplug(struct bitmap *bitmap)
+void bitmap_unplug(struct bitmap *bitmap)
{
unsigned long i, flags;
int dirty, need_write;
struct page *page;
int wait = 0;
- int err;
if (!bitmap)
- return 0;
+ return;
/* look at each page to see if there are any set bits that need to be
* flushed out to disk */
@@ -786,7 +824,7 @@ int bitmap_unplug(struct bitmap *bitmap)
spin_lock_irqsave(&bitmap->lock, flags);
if (!bitmap->filemap) {
spin_unlock_irqrestore(&bitmap->lock, flags);
- return 0;
+ return;
}
page = bitmap->filemap[i];
dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
@@ -798,7 +836,7 @@ int bitmap_unplug(struct bitmap *bitmap)
spin_unlock_irqrestore(&bitmap->lock, flags);
if (dirty | need_write)
- err = write_page(bitmap, page, 0);
+ write_page(bitmap, page, 0);
}
if (wait) { /* if any writes were performed, we need to wait on them */
if (bitmap->file)
@@ -809,7 +847,6 @@ int bitmap_unplug(struct bitmap *bitmap)
}
if (bitmap->flags & BITMAP_WRITE_ERROR)
bitmap_file_kick(bitmap);
- return 0;
}
static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
@@ -858,21 +895,21 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
bmname(bitmap),
(unsigned long) i_size_read(file->f_mapping->host),
bytes + sizeof(bitmap_super_t));
- goto out;
+ goto err;
}
ret = -ENOMEM;
bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
if (!bitmap->filemap)
- goto out;
+ goto err;
/* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
bitmap->filemap_attr = kzalloc(
roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
GFP_KERNEL);
if (!bitmap->filemap_attr)
- goto out;
+ goto err;
oldindex = ~0L;
@@ -905,7 +942,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
}
if (IS_ERR(page)) { /* read error */
ret = PTR_ERR(page);
- goto out;
+ goto err;
}
oldindex = index;
@@ -920,11 +957,13 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
memset(paddr + offset, 0xff,
PAGE_SIZE - offset);
kunmap_atomic(paddr, KM_USER0);
- ret = write_page(bitmap, page, 1);
- if (ret) {
+ write_page(bitmap, page, 1);
+
+ ret = -EIO;
+ if (bitmap->flags & BITMAP_WRITE_ERROR) {
/* release, page not in filemap yet */
put_page(page);
- goto out;
+ goto err;
}
}
@@ -956,11 +995,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
md_wakeup_thread(bitmap->mddev->thread);
}
-out:
printk(KERN_INFO "%s: bitmap initialized from disk: "
- "read %lu/%lu pages, set %lu bits, status: %d\n",
- bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, ret);
+ "read %lu/%lu pages, set %lu bits\n",
+ bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt);
+
+ return 0;
+ err:
+ printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
+ bmname(bitmap), ret);
return ret;
}
@@ -997,19 +1040,18 @@ static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
* out to disk
*/
-int bitmap_daemon_work(struct bitmap *bitmap)
+void bitmap_daemon_work(struct bitmap *bitmap)
{
unsigned long j;
unsigned long flags;
struct page *page = NULL, *lastpage = NULL;
- int err = 0;
int blocks;
void *paddr;
if (bitmap == NULL)
- return 0;
+ return;
if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
- return 0;
+ return;
bitmap->daemon_lastrun = jiffies;
for (j = 0; j < bitmap->chunks; j++) {
@@ -1032,14 +1074,8 @@ int bitmap_daemon_work(struct bitmap *bitmap)
clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
- if (need_write) {
- switch (write_page(bitmap, page, 0)) {
- case 0:
- break;
- default:
- bitmap_file_kick(bitmap);
- }
- }
+ if (need_write)
+ write_page(bitmap, page, 0);
continue;
}
@@ -1048,13 +1084,11 @@ int bitmap_daemon_work(struct bitmap *bitmap)
if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
- err = write_page(bitmap, lastpage, 0);
+ write_page(bitmap, lastpage, 0);
} else {
set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
}
- if (err)
- bitmap_file_kick(bitmap);
} else
spin_unlock_irqrestore(&bitmap->lock, flags);
lastpage = page;
@@ -1097,14 +1131,13 @@ int bitmap_daemon_work(struct bitmap *bitmap)
if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
- err = write_page(bitmap, lastpage, 0);
+ write_page(bitmap, lastpage, 0);
} else {
set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
}
}
- return err;
}
static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
@@ -1517,7 +1550,9 @@ int bitmap_create(mddev_t *mddev)
mddev->thread->timeout = bitmap->daemon_sleep * HZ;
- return bitmap_update_sb(bitmap);
+ bitmap_update_sb(bitmap);
+
+ return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
error:
bitmap_free(bitmap);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f4f7d35561a..846614e676c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -161,9 +161,7 @@ static void local_exit(void)
{
kmem_cache_destroy(_tio_cache);
kmem_cache_destroy(_io_cache);
-
- if (unregister_blkdev(_major, _name) < 0)
- DMERR("unregister_blkdev failed");
+ unregister_blkdev(_major, _name);
_major = 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 33beaa7da08..65ddc887dfd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1640,7 +1640,6 @@ static void sync_sbs(mddev_t * mddev, int nospares)
static void md_update_sb(mddev_t * mddev, int force_change)
{
- int err;
struct list_head *tmp;
mdk_rdev_t *rdev;
int sync_req;
@@ -1727,7 +1726,7 @@ repeat:
"md: updating %s RAID superblock on device (in sync %d)\n",
mdname(mddev),mddev->in_sync);
- err = bitmap_update_sb(mddev->bitmap);
+ bitmap_update_sb(mddev->bitmap);
ITERATE_RDEV(mddev,rdev,tmp) {
char b[BDEVNAME_SIZE];
dprintk(KERN_INFO "md: ");
@@ -2073,9 +2072,11 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
err = super_types[super_format].
load_super(rdev, NULL, super_minor);
if (err == -EINVAL) {
- printk(KERN_WARNING
- "md: %s has invalid sb, not importing!\n",
- bdevname(rdev->bdev,b));
+ printk(KERN_WARNING
+ "md: %s does not have a valid v%d.%d "
+ "superblock, not importing!\n",
+ bdevname(rdev->bdev,b),
+ super_format, super_minor);
goto abort_free;
}
if (err < 0) {
@@ -3174,13 +3175,33 @@ static int do_md_run(mddev_t * mddev)
* Drop all container device buffers, from now on
* the only valid external interface is through the md
* device.
- * Also find largest hardsector size
*/
ITERATE_RDEV(mddev,rdev,tmp) {
if (test_bit(Faulty, &rdev->flags))
continue;
sync_blockdev(rdev->bdev);
invalidate_bdev(rdev->bdev);
+
+ /* perform some consistency tests on the device.
+ * We don't want the data to overlap the metadata,
+ * Internal Bitmap issues has handled elsewhere.
+ */
+ if (rdev->data_offset < rdev->sb_offset) {
+ if (mddev->size &&
+ rdev->data_offset + mddev->size*2
+ > rdev->sb_offset*2) {
+ printk("md: %s: data overlaps metadata\n",
+ mdname(mddev));
+ return -EINVAL;
+ }
+ } else {
+ if (rdev->sb_offset*2 + rdev->sb_size/512
+ > rdev->data_offset) {
+ printk("md: %s: metadata overlaps data\n",
+ mdname(mddev));
+ return -EINVAL;
+ }
+ }
}
md_probe(mddev->unit, NULL, NULL);
@@ -4642,7 +4663,6 @@ static int md_thread(void * arg)
* many dirty RAID5 blocks.
*/
- current->flags |= PF_NOFREEZE;
allow_signal(SIGKILL);
while (!kthread_should_stop()) {
@@ -5090,7 +5110,7 @@ static int is_mddev_idle(mddev_t *mddev)
mdk_rdev_t * rdev;
struct list_head *tmp;
int idle;
- unsigned long curr_events;
+ long curr_events;
idle = 1;
ITERATE_RDEV(mddev,rdev,tmp) {
@@ -5098,20 +5118,29 @@ static int is_mddev_idle(mddev_t *mddev)
curr_events = disk_stat_read(disk, sectors[0]) +
disk_stat_read(disk, sectors[1]) -
atomic_read(&disk->sync_io);
- /* The difference between curr_events and last_events
- * will be affected by any new non-sync IO (making
- * curr_events bigger) and any difference in the amount of
- * in-flight syncio (making current_events bigger or smaller)
- * The amount in-flight is currently limited to
- * 32*64K in raid1/10 and 256*PAGE_SIZE in raid5/6
- * which is at most 4096 sectors.
- * These numbers are fairly fragile and should be made
- * more robust, probably by enforcing the
- * 'window size' that md_do_sync sort-of uses.
+ /* sync IO will cause sync_io to increase before the disk_stats
+ * as sync_io is counted when a request starts, and
+ * disk_stats is counted when it completes.
+ * So resync activity will cause curr_events to be smaller than
+ * when there was no such activity.
+ * non-sync IO will cause disk_stat to increase without
+ * increasing sync_io so curr_events will (eventually)
+ * be larger than it was before. Once it becomes
+ * substantially larger, the test below will cause
+ * the array to appear non-idle, and resync will slow
+ * down.
+ * If there is a lot of outstanding resync activity when
+ * we set last_event to curr_events, then all that activity
+ * completing might cause the array to appear non-idle
+ * and resync will be slowed down even though there might
+ * not have been non-resync activity. This will only
+ * happen once though. 'last_events' will soon reflect
+ * the state where there is little or no outstanding
+ * resync requests, and further resync activity will
+ * always make curr_events less than last_events.
*
- * Note: the following is an unsigned comparison.
*/
- if ((long)curr_events - (long)rdev->last_events > 4096) {
+ if (curr_events - rdev->last_events > 4096) {
rdev->last_events = curr_events;
idle = 0;
}
@@ -5772,7 +5801,7 @@ static void autostart_arrays(int part)
for (i = 0; i < dev_cnt; i++) {
dev_t dev = detected_devices[i];
- rdev = md_import_device(dev,0, 0);
+ rdev = md_import_device(dev,0, 90);
if (IS_ERR(rdev))
continue;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 46677d7d998..00c78b77b13 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1526,8 +1526,7 @@ static void raid1d(mddev_t *mddev)
blk_remove_plug(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
- if (bitmap_unplug(mddev->bitmap) != 0)
- printk("%s: bitmap file write failed!\n", mdname(mddev));
+ bitmap_unplug(mddev->bitmap);
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9eb66c1b523..a95ada1cfac 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1510,8 +1510,7 @@ static void raid10d(mddev_t *mddev)
blk_remove_plug(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
- if (bitmap_unplug(mddev->bitmap) != 0)
- printk("%s: bitmap file write failed!\n", mdname(mddev));
+ bitmap_unplug(mddev->bitmap);
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;