aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c22
-rw-r--r--drivers/md/dm-mpath.c8
-rw-r--r--drivers/md/dm-raid1.c4
-rw-r--r--drivers/md/dm-snap.c32
-rw-r--r--drivers/md/dm-snap.h2
-rw-r--r--drivers/md/dm-stripe.c4
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm.c34
-rw-r--r--drivers/md/linear.c2
-rw-r--r--drivers/md/md.c57
-rw-r--r--drivers/md/raid10.c2
11 files changed, 114 insertions, 55 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index ac89a5deaca..ab7c8e4a61f 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -208,16 +208,19 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
*/
/* IO operations when bitmap is stored near all superblocks */
-static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index)
+static struct page *read_sb_page(mddev_t *mddev, long offset,
+ struct page *page,
+ unsigned long index, int size)
{
/* choose a good rdev and read the page from there */
mdk_rdev_t *rdev;
struct list_head *tmp;
- struct page *page = alloc_page(GFP_KERNEL);
sector_t target;
if (!page)
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
return ERR_PTR(-ENOMEM);
rdev_for_each(rdev, tmp, mddev) {
@@ -227,7 +230,9 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
- if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) {
+ if (sync_page_io(rdev->bdev, target,
+ roundup(size, bdev_hardsect_size(rdev->bdev)),
+ page, READ)) {
page->index = index;
attach_page_buffers(page, NULL); /* so that free_buffer will
* quietly no-op */
@@ -544,7 +549,9 @@ static int bitmap_read_sb(struct bitmap *bitmap)
bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
} else {
- bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0);
+ bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset,
+ NULL,
+ 0, sizeof(bitmap_super_t));
}
if (IS_ERR(bitmap->sb_page)) {
err = PTR_ERR(bitmap->sb_page);
@@ -957,11 +964,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
*/
page = bitmap->sb_page;
offset = sizeof(bitmap_super_t);
+ read_sb_page(bitmap->mddev, bitmap->offset,
+ page,
+ index, count);
} else if (file) {
page = read_page(file, index, bitmap, count);
offset = 0;
} else {
- page = read_sb_page(bitmap->mddev, bitmap->offset, index);
+ page = read_sb_page(bitmap->mddev, bitmap->offset,
+ NULL,
+ index, count);
offset = 0;
}
if (IS_ERR(page)) { /* read error */
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 4840733cd90..3d7f4923cd1 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -441,13 +441,13 @@ static void process_queued_ios(struct work_struct *work)
__choose_pgpath(m);
pgpath = m->current_pgpath;
- m->pgpath_to_activate = m->current_pgpath;
if ((pgpath && !m->queue_io) ||
(!pgpath && !m->queue_if_no_path))
must_queue = 0;
- if (m->pg_init_required && !m->pg_init_in_progress) {
+ if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
+ m->pgpath_to_activate = pgpath;
m->pg_init_count++;
m->pg_init_required = 0;
m->pg_init_in_progress = 1;
@@ -708,6 +708,10 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
m->hw_handler_name = NULL;
return -EINVAL;
}
+
+ if (hw_argc > 1)
+ DMWARN("Ignoring user-specified arguments for "
+ "hardware handler \"%s\"", m->hw_handler_name);
consume(as, hw_argc - 1);
return 0;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 92dcc06832a..ec43f9fa4b2 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -656,9 +656,10 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
return;
if (!ms->log_failure) {
- while ((bio = bio_list_pop(failures)))
+ while ((bio = bio_list_pop(failures))) {
ms->in_sync = 0;
dm_rh_mark_nosync(ms->rh, bio, bio->bi_size, 0);
+ }
return;
}
@@ -1031,6 +1032,7 @@ static void mirror_dtr(struct dm_target *ti)
del_timer_sync(&ms->timer);
flush_workqueue(ms->kmirrord_wq);
+ flush_scheduled_work();
dm_kcopyd_client_destroy(ms->kcopyd_client);
destroy_workqueue(ms->kmirrord_wq);
free_context(ms, ti, ms->nr_mirrors);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index b2d9d1ac28a..6c96db26b87 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -229,19 +229,21 @@ static void __insert_origin(struct origin *o)
*/
static int register_snapshot(struct dm_snapshot *snap)
{
- struct origin *o;
+ struct origin *o, *new_o;
struct block_device *bdev = snap->origin->bdev;
+ new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
+ if (!new_o)
+ return -ENOMEM;
+
down_write(&_origins_lock);
o = __lookup_origin(bdev);
- if (!o) {
+ if (o)
+ kfree(new_o);
+ else {
/* New origin */
- o = kmalloc(sizeof(*o), GFP_KERNEL);
- if (!o) {
- up_write(&_origins_lock);
- return -ENOMEM;
- }
+ o = new_o;
/* Initialise the struct */
INIT_LIST_HEAD(&o->snapshots);
@@ -368,6 +370,7 @@ static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snaps
struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool,
GFP_NOIO);
+ atomic_inc(&s->pending_exceptions_count);
pe->snap = s;
return pe;
@@ -375,7 +378,11 @@ static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snaps
static void free_pending_exception(struct dm_snap_pending_exception *pe)
{
- mempool_free(pe, pe->snap->pending_pool);
+ struct dm_snapshot *s = pe->snap;
+
+ mempool_free(pe, s->pending_pool);
+ smp_mb__before_atomic_dec();
+ atomic_dec(&s->pending_exceptions_count);
}
static void insert_completed_exception(struct dm_snapshot *s,
@@ -600,6 +607,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->valid = 1;
s->active = 0;
+ atomic_set(&s->pending_exceptions_count, 0);
init_rwsem(&s->lock);
spin_lock_init(&s->pe_lock);
s->ti = ti;
@@ -726,6 +734,14 @@ static void snapshot_dtr(struct dm_target *ti)
/* After this returns there can be no new kcopyd jobs. */
unregister_snapshot(s);
+ while (atomic_read(&s->pending_exceptions_count))
+ yield();
+ /*
+ * Ensure instructions in mempool_destroy aren't reordered
+ * before atomic_read.
+ */
+ smp_mb();
+
#ifdef CONFIG_DM_DEBUG
for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
index f07315fe236..99c0106ede2 100644
--- a/drivers/md/dm-snap.h
+++ b/drivers/md/dm-snap.h
@@ -160,6 +160,8 @@ struct dm_snapshot {
mempool_t *pending_pool;
+ atomic_t pending_exceptions_count;
+
struct exception_table pending;
struct exception_table complete;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index a2d068dbe9e..9e4ef88d421 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -320,8 +320,10 @@ int __init dm_stripe_init(void)
int r;
r = dm_register_target(&stripe_target);
- if (r < 0)
+ if (r < 0) {
DMWARN("target registration failed");
+ return r;
+ }
kstriped = create_singlethread_workqueue("kstriped");
if (!kstriped) {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index a63161aec48..04e5fd742c2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -668,7 +668,7 @@ static void check_for_valid_limits(struct io_restrictions *rs)
if (!rs->max_segment_size)
rs->max_segment_size = MAX_SEGMENT_SIZE;
if (!rs->seg_boundary_mask)
- rs->seg_boundary_mask = -1;
+ rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
if (!rs->bounce_pfn)
rs->bounce_pfn = -1;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6963ad14840..c99e4728ff4 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -375,7 +375,7 @@ static void start_io_acct(struct dm_io *io)
dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
}
-static int end_io_acct(struct dm_io *io)
+static void end_io_acct(struct dm_io *io)
{
struct mapped_device *md = io->md;
struct bio *bio = io->bio;
@@ -391,7 +391,9 @@ static int end_io_acct(struct dm_io *io)
dm_disk(md)->part0.in_flight = pending =
atomic_dec_return(&md->pending);
- return !pending;
+ /* nudge anyone waiting on suspend queue */
+ if (!pending)
+ wake_up(&md->wait);
}
/*
@@ -499,9 +501,7 @@ static void dec_pending(struct dm_io *io, int error)
spin_unlock_irqrestore(&io->md->pushback_lock, flags);
}
- if (end_io_acct(io))
- /* nudge anyone waiting on suspend queue */
- wake_up(&io->md->wait);
+ end_io_acct(io);
if (io->error != DM_ENDIO_REQUEUE) {
blk_add_trace_bio(io->md->queue, io->bio,
@@ -937,16 +937,24 @@ static void dm_unplug_all(struct request_queue *q)
static int dm_any_congested(void *congested_data, int bdi_bits)
{
- int r;
- struct mapped_device *md = (struct mapped_device *) congested_data;
- struct dm_table *map = dm_get_table(md);
+ int r = bdi_bits;
+ struct mapped_device *md = congested_data;
+ struct dm_table *map;
- if (!map || test_bit(DMF_BLOCK_IO, &md->flags))
- r = bdi_bits;
- else
- r = dm_table_any_congested(map, bdi_bits);
+ atomic_inc(&md->pending);
+
+ if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
+ map = dm_get_table(md);
+ if (map) {
+ r = dm_table_any_congested(map, bdi_bits);
+ dm_table_put(map);
+ }
+ }
+
+ if (!atomic_dec_return(&md->pending))
+ /* nudge anyone waiting on suspend queue */
+ wake_up(&md->wait);
- dm_table_put(map);
return r;
}
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 190147c79e7..3b90c5c924e 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -148,6 +148,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
min_sectors = conf->array_sectors;
sector_div(min_sectors, PAGE_SIZE/sizeof(struct dev_info *));
+ if (min_sectors == 0)
+ min_sectors = 1;
/* min_sectors is the minimum spacing that will fit the hash
* table in one PAGE. This may be much smaller than needed.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c1a837ca193..1b1d32694f6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -222,6 +222,9 @@ static void mddev_put(mddev_t *mddev)
list_del(&mddev->all_mddevs);
spin_unlock(&all_mddevs_lock);
blk_cleanup_queue(mddev->queue);
+ if (mddev->sysfs_state)
+ sysfs_put(mddev->sysfs_state);
+ mddev->sysfs_state = NULL;
kobject_put(&mddev->kobj);
} else
spin_unlock(&all_mddevs_lock);
@@ -1459,6 +1462,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
kobject_del(&rdev->kobj);
goto fail;
}
+ rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state");
+
list_add_rcu(&rdev->same_set, &mddev->disks);
bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
return 0;
@@ -1488,7 +1493,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
-
+ sysfs_put(rdev->sysfs_state);
+ rdev->sysfs_state = NULL;
/* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state". We also need
* to delay it due to rcu usage.
@@ -1923,8 +1929,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
err = 0;
}
- if (!err)
- sysfs_notify(&rdev->kobj, NULL, "state");
+ if (!err && rdev->sysfs_state)
+ sysfs_notify_dirent(rdev->sysfs_state);
return err ? err : len;
}
static struct rdev_sysfs_entry rdev_state =
@@ -2019,7 +2025,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
rdev->raid_disk = -1;
return err;
} else
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
printk(KERN_WARNING
@@ -2036,7 +2042,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
clear_bit(Faulty, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
set_bit(In_sync, &rdev->flags);
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
}
return len;
}
@@ -2770,7 +2776,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
if (err)
return err;
else {
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
return len;
}
}
@@ -3457,6 +3463,11 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
disk->fops = &md_fops;
disk->private_data = mddev;
disk->queue = mddev->queue;
+ /* Allow extended partitions. This makes the
+ * 'mdp' device redundant, but we can really
+ * remove it now.
+ */
+ disk->flags |= GENHD_FL_EXT_DEVT;
add_disk(disk);
mddev->gendisk = disk;
error = kobject_init_and_add(&mddev->kobj, &md_ktype,
@@ -3465,8 +3476,10 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
if (error)
printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
disk->disk_name);
- else
+ else {
kobject_uevent(&mddev->kobj, KOBJ_ADD);
+ mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
+ }
return NULL;
}
@@ -3477,7 +3490,7 @@ static void md_safemode_timeout(unsigned long data)
if (!atomic_read(&mddev->writes_pending)) {
mddev->safemode = 1;
if (mddev->external)
- set_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags);
+ sysfs_notify_dirent(mddev->sysfs_state);
}
md_wakeup_thread(mddev->thread);
}
@@ -3578,7 +3591,7 @@ static int do_md_run(mddev_t * mddev)
return -EINVAL;
}
}
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
}
md_probe(mddev->unit, NULL, NULL);
@@ -3740,7 +3753,7 @@ static int do_md_run(mddev_t * mddev)
mddev->changed = 1;
md_new_event(mddev);
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
sysfs_notify(&mddev->kobj, NULL, "sync_action");
sysfs_notify(&mddev->kobj, NULL, "degraded");
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
@@ -3767,7 +3780,7 @@ static int restart_array(mddev_t *mddev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
return 0;
}
@@ -3847,7 +3860,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
module_put(mddev->pers->owner);
mddev->pers = NULL;
/* tell userspace to handle 'inactive' */
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
set_capacity(disk, 0);
mddev->changed = 1;
@@ -3927,13 +3940,14 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
mddev->degraded = 0;
mddev->barriers_work = 0;
mddev->safemode = 0;
+ kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
mdname(mddev));
err = 0;
md_new_event(mddev);
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
out:
return err;
}
@@ -4297,7 +4311,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
if (err)
export_rdev(rdev);
else
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
md_update_sb(mddev, 1);
if (mddev->degraded)
@@ -4938,7 +4952,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
if (mddev->ro == 2) {
mddev->ro = 0;
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
} else {
@@ -5612,7 +5626,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
spin_unlock_irq(&mddev->write_lock);
}
if (did_change)
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@ -5655,7 +5669,7 @@ int md_allow_write(mddev_t *mddev)
mddev->safemode = 1;
spin_unlock_irq(&mddev->write_lock);
md_update_sb(mddev, 0);
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
} else
spin_unlock_irq(&mddev->write_lock);
@@ -6048,9 +6062,6 @@ void md_check_recovery(mddev_t *mddev)
if (mddev->bitmap)
bitmap_daemon_work(mddev->bitmap);
- if (test_and_clear_bit(MD_NOTIFY_ARRAY_STATE, &mddev->flags))
- sysfs_notify(&mddev->kobj, NULL, "array_state");
-
if (mddev->ro)
return;
@@ -6103,7 +6114,7 @@ void md_check_recovery(mddev_t *mddev)
mddev->safemode = 0;
spin_unlock_irq(&mddev->write_lock);
if (did_change)
- sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify_dirent(mddev->sysfs_state);
}
if (mddev->flags)
@@ -6111,7 +6122,7 @@ void md_check_recovery(mddev_t *mddev)
rdev_for_each(rdev, rtmp, mddev)
if (test_and_clear_bit(StateChanged, &rdev->flags))
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
@@ -6221,7 +6232,7 @@ void md_check_recovery(mddev_t *mddev)
void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
{
- sysfs_notify(&rdev->kobj, NULL, "state");
+ sysfs_notify_dirent(rdev->sysfs_state);
wait_event_timeout(rdev->blocked_wait,
!test_bit(Blocked, &rdev->flags),
msecs_to_jiffies(5000));
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index da5129a24b1..970a96ef9b1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1137,7 +1137,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
if (!enough(conf))
return -EINVAL;
- if (rdev->raid_disk)
+ if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
if (rdev->saved_raid_disk >= 0 &&