From 83c0d5d5388a8d45f7a45e0ec34adc52a78c81ad Mon Sep 17 00:00:00 2001 From: "Moger, Babu" Date: Sat, 6 Mar 2010 02:29:45 +0000 Subject: dm mpath: pass struct pgpath to pg init done This patch removes some unnecessary argument casting. There is no functional change with this patch. Passes 'struct pgpath' through to pg_init_done() instead of the enclosed 'struct dm_path'. Tested the changes with LSI storage.. CC: Chandra Seetharaman Signed-off-by: Babu Moger Acked-by: Kiyoshi Ueda Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e81345a1d08..2c6bf74ad5c 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1128,8 +1128,7 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) static void pg_init_done(void *data, int errors) { - struct dm_path *path = data; - struct pgpath *pgpath = path_to_pgpath(path); + struct pgpath *pgpath = data; struct priority_group *pg = pgpath->pg; struct multipath *m = pg->m; unsigned long flags; @@ -1198,7 +1197,7 @@ static void activate_path(struct work_struct *work) container_of(work, struct pgpath, activate_path); scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), - pg_init_done, &pgpath->path); + pg_init_done, pgpath); } /* -- cgit v1.2.3 From f7b934c8127deebf4eb56fbe4a4ae0da16b6dbcd Mon Sep 17 00:00:00 2001 From: "Moger, Babu" Date: Sat, 6 Mar 2010 02:29:49 +0000 Subject: dm mpath: skip activate_path for failed paths This patch adds two minor fixes while processing device mapper path activation. Skip failed paths while calling activate_path. If the path is already failed then activate_path will fail for sure. We don't have to call in that case. In some case this might cause prolonged retries unnecessarily. Change the misleading message if the path being activated fails with SCSI_DH_NOSYS. Signed-off-by: Babu Moger Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 2c6bf74ad5c..ecada419809 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -461,6 +461,9 @@ static void process_queued_ios(struct work_struct *work) m->pg_init_count++; m->pg_init_required = 0; list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) { + /* Skip failed paths */ + if (!tmp->is_active) + continue; if (queue_work(kmpath_handlerd, &tmp->activate_path)) m->pg_init_in_progress++; } @@ -1142,8 +1145,8 @@ static void pg_init_done(void *data, int errors) errors = 0; break; } - DMERR("Cannot failover device because scsi_dh_%s was not " - "loaded.", m->hw_handler_name); + DMERR("Could not failover the device: Handler scsi_dh_%s " + "Error %d.", m->hw_handler_name, errors); /* * Fail path for now, so we do not ping pong */ -- cgit v1.2.3 From ecdb2e257abc33ae6798d3ccba87bdafa40ef6b6 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:29:52 +0000 Subject: dm table: remove dm_get from dm_table_get_md Remove the dm_get() in dm_table_get_md() because dm_table_get_md() could be called from presuspend/postsuspend, which are called while mapped_device is in DMF_FREEING state, where dm_get() is not allowed. Justification for that is the lifetime of both objects: As far as the current dm design/implementation, mapped_device is never freed while targets are doing something, because dm core waits for targets to become quiet in dm_put() using presuspend/postsuspend. So targets should be able to touch mapped_device without holding reference count of the mapped_device, and we should allow targets to touch mapped_device even if it is in DMF_FREEING state. Backgrounds: I'm trying to remove the multipath internal queue, since dm core now has a generic queue for request-based dm. In the patch-set, the multipath target wants to request dm core to start/stop queue. One of such start/stop requests can happen during postsuspend() while the target waits for pg-init to complete, because the target stops queue when starting pg-init and tries to restart it when completing pg-init. Since queue belongs to mapped_device, it involves calling dm_table_get_md() and dm_put(). On the other hand, postsuspend() is called in dm_put() for mapped_device which is in DMF_FREEING state, and that triggers BUG_ON(DMF_FREEING) in the 2nd dm_put(). I had tried to solve this problem by changing only multipath not to touch mapped_device which is in DMF_FREEING state, but I couldn't and I came up with a question why we need dm_get() in dm_table_get_md(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 2 -- drivers/md/dm-uevent.c | 7 ++----- drivers/md/dm.c | 14 ++------------ 3 files changed, 4 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4b22feb01a0..7d70cca585a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1231,8 +1231,6 @@ void dm_table_unplug_all(struct dm_table *t) struct mapped_device *dm_table_get_md(struct dm_table *t) { - dm_get(t->md); - return t->md; } diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c index c7c555a8c7b..6b1e3b61b25 100644 --- a/drivers/md/dm-uevent.c +++ b/drivers/md/dm-uevent.c @@ -187,7 +187,7 @@ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti, if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) { DMERR("%s: Invalid event_type %d", __func__, event_type); - goto out; + return; } event = dm_build_path_uevent(md, ti, @@ -195,12 +195,9 @@ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti, _dm_uevent_type_names[event_type].name, path, nr_valid_paths); if (IS_ERR(event)) - goto out; + return; dm_uevent_add(md, &event->elist); - -out: - dm_put(md); } EXPORT_SYMBOL_GPL(dm_path_uevent); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index aa4e2aa86d4..21222f5193f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2699,23 +2699,13 @@ int dm_suspended_md(struct mapped_device *md) int dm_suspended(struct dm_target *ti) { - struct mapped_device *md = dm_table_get_md(ti->table); - int r = dm_suspended_md(md); - - dm_put(md); - - return r; + return dm_suspended_md(dm_table_get_md(ti->table)); } EXPORT_SYMBOL_GPL(dm_suspended); int dm_noflush_suspending(struct dm_target *ti) { - struct mapped_device *md = dm_table_get_md(ti->table); - int r = __noflush_suspending(md); - - dm_put(md); - - return r; + return __noflush_suspending(dm_table_get_md(ti->table)); } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -- cgit v1.2.3 From fce323dd68e13354071538c765b062859e6f8286 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:29:59 +0000 Subject: dm mpath: avoid storing private suspended state 'suspended' flag in struct multipath was introduced to check whether the multipath target is in suspended state, but the same check is done through dm_suspended() now, so remove the flag and related code. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Mike Anderson Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index ecada419809..8fa0c955eeb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -95,8 +95,6 @@ struct multipath { mempool_t *mpio_pool; struct mutex work_mutex; - - unsigned suspended; /* Don't create new I/O internally when set. */ }; /* @@ -1278,7 +1276,6 @@ static void multipath_postsuspend(struct dm_target *ti) struct multipath *m = ti->private; mutex_lock(&m->work_mutex); - m->suspended = 1; flush_multipath_work(); mutex_unlock(&m->work_mutex); } @@ -1291,10 +1288,6 @@ static void multipath_resume(struct dm_target *ti) struct multipath *m = (struct multipath *) ti->private; unsigned long flags; - mutex_lock(&m->work_mutex); - m->suspended = 0; - mutex_unlock(&m->work_mutex); - spin_lock_irqsave(&m->lock, flags); m->queue_if_no_path = m->saved_queue_if_no_path; spin_unlock_irqrestore(&m->lock, flags); @@ -1430,11 +1423,6 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) mutex_lock(&m->work_mutex); - if (m->suspended) { - r = -EBUSY; - goto out; - } - if (dm_suspended(ti)) { r = -EBUSY; goto out; -- cgit v1.2.3 From d0259bf0eefc503d3c9c9ccda35033c3dd3aac30 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:30:02 +0000 Subject: dm mpath: hold io until all pg_inits completed m->queue_io is set to block processing I/Os, and it needs to be kept while pg-init, which issues multiple path activations, is in progress. But m->queue is cleared when a path activation completes without error in pg_init_done(), even while other path activations are in progress. That may cause undesired -EIO on paths which are not complete activation. This patch fixes that by not clearing m->queue_io until all path activations complete. (Before the hardware handlers were moved into the SCSI layer, pg_init only used one path.) Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 8fa0c955eeb..ae187e5d7d5 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1181,14 +1181,19 @@ static void pg_init_done(void *data, int errors) m->current_pgpath = NULL; m->current_pg = NULL; } - } else if (!m->pg_init_required) { - m->queue_io = 0; + } else if (!m->pg_init_required) pg->bypassed = 0; - } - m->pg_init_in_progress--; - if (!m->pg_init_in_progress) - queue_work(kmultipathd, &m->process_queued_ios); + if (--m->pg_init_in_progress) + /* Activations of other paths are still on going */ + goto out; + + if (!m->pg_init_required) + m->queue_io = 0; + + queue_work(kmultipathd, &m->process_queued_ios); + +out: spin_unlock_irqrestore(&m->lock, flags); } -- cgit v1.2.3 From 2bded7bd7e8b12a913b0b58167a48220560e1514 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:32:13 +0000 Subject: dm mpath: wait for pg_init completion when suspending When suspending the device we must wait for all I/O to complete, but pg-init may be still in progress even after flushing the workqueue for kmpath_handlerd in multipath_postsuspend. This patch waits for pg-init completion correctly in multipath_postsuspend(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index ae187e5d7d5..030bc2a053e 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -69,6 +69,7 @@ struct multipath { struct list_head priority_groups; unsigned pg_init_required; /* pg_init needs calling? */ unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ + wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ unsigned nr_valid_paths; /* Total number of usable paths */ struct pgpath *current_pgpath; @@ -200,6 +201,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) m->queue_io = 1; INIT_WORK(&m->process_queued_ios, process_queued_ios); INIT_WORK(&m->trigger_event, trigger_event); + init_waitqueue_head(&m->pg_init_wait); mutex_init(&m->work_mutex); m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); if (!m->mpio_pool) { @@ -891,9 +893,34 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, return r; } -static void flush_multipath_work(void) +static void multipath_wait_for_pg_init_completion(struct multipath *m) +{ + DECLARE_WAITQUEUE(wait, current); + unsigned long flags; + + add_wait_queue(&m->pg_init_wait, &wait); + + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + + spin_lock_irqsave(&m->lock, flags); + if (!m->pg_init_in_progress) { + spin_unlock_irqrestore(&m->lock, flags); + break; + } + spin_unlock_irqrestore(&m->lock, flags); + + io_schedule(); + } + set_current_state(TASK_RUNNING); + + remove_wait_queue(&m->pg_init_wait, &wait); +} + +static void flush_multipath_work(struct multipath *m) { flush_workqueue(kmpath_handlerd); + multipath_wait_for_pg_init_completion(m); flush_workqueue(kmultipathd); flush_scheduled_work(); } @@ -902,7 +929,7 @@ static void multipath_dtr(struct dm_target *ti) { struct multipath *m = ti->private; - flush_multipath_work(); + flush_multipath_work(m); free_multipath(m); } @@ -1193,6 +1220,11 @@ static void pg_init_done(void *data, int errors) queue_work(kmultipathd, &m->process_queued_ios); + /* + * Wake up any thread waiting to suspend. + */ + wake_up(&m->pg_init_wait); + out: spin_unlock_irqrestore(&m->lock, flags); } @@ -1281,7 +1313,7 @@ static void multipath_postsuspend(struct dm_target *ti) struct multipath *m = ti->private; mutex_lock(&m->work_mutex); - flush_multipath_work(); + flush_multipath_work(m); mutex_unlock(&m->work_mutex); } -- cgit v1.2.3 From fb61264297ca42a2a132f0433f75ccf7fd304ac6 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:32:18 +0000 Subject: dm mpath: refactor pg_init This patch pulls the pg_init path activation code out of process_queued_ios() into a new function. No functional change. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 030bc2a053e..c1335487cc7 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -235,6 +235,21 @@ static void free_multipath(struct multipath *m) * Path selection *-----------------------------------------------*/ +static void __pg_init_all_paths(struct multipath *m) +{ + struct pgpath *pgpath; + + m->pg_init_count++; + m->pg_init_required = 0; + list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { + /* Skip failed paths */ + if (!pgpath->is_active) + continue; + if (queue_work(kmpath_handlerd, &pgpath->activate_path)) + m->pg_init_in_progress++; + } +} + static void __switch_pg(struct multipath *m, struct pgpath *pgpath) { m->current_pg = pgpath->pg; @@ -439,7 +454,7 @@ static void process_queued_ios(struct work_struct *work) { struct multipath *m = container_of(work, struct multipath, process_queued_ios); - struct pgpath *pgpath = NULL, *tmp; + struct pgpath *pgpath = NULL; unsigned must_queue = 1; unsigned long flags; @@ -457,17 +472,9 @@ static void process_queued_ios(struct work_struct *work) (!pgpath && !m->queue_if_no_path)) must_queue = 0; - if (m->pg_init_required && !m->pg_init_in_progress && pgpath) { - m->pg_init_count++; - m->pg_init_required = 0; - list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) { - /* Skip failed paths */ - if (!tmp->is_active) - continue; - if (queue_work(kmpath_handlerd, &tmp->activate_path)) - m->pg_init_in_progress++; - } - } + if (m->pg_init_required && !m->pg_init_in_progress && pgpath) + __pg_init_all_paths(m); + out: spin_unlock_irqrestore(&m->lock, flags); if (!must_queue) -- cgit v1.2.3 From ede5ea0b8b815560dc54c712536fdf0b456b6ad0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 6 Mar 2010 02:32:22 +0000 Subject: dm raid1: always return error if all legs fail If all mirror legs fail, always return an error instead of holding the bio, even if the handle_errors option was set. At present it is the responsibility of the driver underneath us to deal with retries, multipath etc. The patch adds the bio to the failures list instead of holding it directly. do_failures tests first if all legs failed and, if so, returns the bio with -EIO. If any leg is still alive and handle_errors is set, do_failures calls hold_bio. Reviewed-by: Takahiro Yasui Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 6c1046df81f..de26fde4098 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -737,9 +737,12 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) dm_rh_delay(ms->rh, bio); while ((bio = bio_list_pop(&nosync))) { - if (unlikely(ms->leg_failure) && errors_handled(ms)) - hold_bio(ms, bio); - else { + if (unlikely(ms->leg_failure) && errors_handled(ms)) { + spin_lock_irq(&ms->lock); + bio_list_add(&ms->failures, bio); + spin_unlock_irq(&ms->lock); + wakeup_mirrord(ms); + } else { map_bio(get_default_mirror(ms), bio); generic_make_request(bio); } -- cgit v1.2.3 From 0f3649a9e305ea22eb196a84a2d7520afcaa6060 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 6 Mar 2010 02:32:24 +0000 Subject: dm ioctl: only issue uevent on resume if state changed Only issue a uevent on a resume if the state of the device changed, i.e. if it was suspended and/or its table was replaced. Signed-off-by: Dave Wysochanski Signed-off-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 1d669322b27..e3cf5686d0a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -897,16 +897,17 @@ static int do_resume(struct dm_ioctl *param) set_disk_ro(dm_disk(md), 1); } - if (dm_suspended_md(md)) + if (dm_suspended_md(md)) { r = dm_resume(md); + if (!r) + dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr); + } if (old_map) dm_table_destroy(old_map); - if (!r) { - dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr); + if (!r) r = __dev_status(md, param); - } dm_put(md); return r; -- cgit v1.2.3 From 8215d6ec5fee1e76545decea2cd73717efb5cb42 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Sat, 6 Mar 2010 02:32:27 +0000 Subject: dm table: remove unused dm_get_device range parameters Remove unused parameters(start and len) of dm_get_device() and fix the callers. Signed-off-by: Nikanth Karthikesan Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 3 +-- drivers/md/dm-delay.c | 8 ++++---- drivers/md/dm-linear.c | 3 +-- drivers/md/dm-log.c | 3 +-- drivers/md/dm-mpath.c | 7 +++---- drivers/md/dm-raid1.c | 3 +-- drivers/md/dm-snap.c | 8 +++----- drivers/md/dm-stripe.c | 3 +-- drivers/md/dm-table.c | 10 ++++------ 9 files changed, 19 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a93637223c8..3bdbb611570 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1160,8 +1160,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; - if (dm_get_device(ti, argv[3], cc->start, ti->len, - dm_table_get_mode(ti->table), &cc->dev)) { + if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) { ti->error = "Device lookup failed"; goto bad_device; } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index ebe7381f47c..852052880d7 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -156,8 +156,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - if (dm_get_device(ti, argv[0], dc->start_read, ti->len, - dm_table_get_mode(ti->table), &dc->dev_read)) { + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), + &dc->dev_read)) { ti->error = "Device lookup failed"; goto bad; } @@ -177,8 +177,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_dev_read; } - if (dm_get_device(ti, argv[3], dc->start_write, ti->len, - dm_table_get_mode(ti->table), &dc->dev_write)) { + if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), + &dc->dev_write)) { ti->error = "Write device lookup failed"; goto bad_dev_read; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 82f7d6e6b1e..9200dbf2391 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -47,8 +47,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) } lc->start = tmp; - if (dm_get_device(ti, argv[0], lc->start, ti->len, - dm_table_get_mode(ti->table), &lc->dev)) { + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev)) { ti->error = "dm-linear: Device lookup failed"; goto bad; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 7035582786f..5a08be0222d 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -543,8 +543,7 @@ static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti, return -EINVAL; } - r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */, - FMODE_READ | FMODE_WRITE, &dev); + r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &dev); if (r) return r; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c1335487cc7..826bce7343b 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -607,8 +607,8 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, if (!p) return ERR_PTR(-ENOMEM); - r = dm_get_device(ti, shift(as), ti->begin, ti->len, - dm_table_get_mode(ti->table), &p->path.dev); + r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table), + &p->path.dev); if (r) { ti->error = "error getting device"; goto bad; @@ -1505,8 +1505,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) goto out; } - r = dm_get_device(ti, argv[1], ti->begin, ti->len, - dm_table_get_mode(ti->table), &dev); + r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); if (r) { DMWARN("message: error getting device %s", argv[1]); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index de26fde4098..6d66ddf3907 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -920,8 +920,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, return -EINVAL; } - if (dm_get_device(ti, argv[0], offset, ti->len, - dm_table_get_mode(ti->table), + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ms->mirror[mirror].dev)) { ti->error = "Device lookup failure"; return -ENXIO; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index ee8eb283650..0789c22ff0d 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1081,8 +1081,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) argv++; argc--; - r = dm_get_device(ti, cow_path, 0, 0, - FMODE_READ | FMODE_WRITE, &s->cow); + r = dm_get_device(ti, cow_path, FMODE_READ | FMODE_WRITE, &s->cow); if (r) { ti->error = "Cannot get COW device"; goto bad_cow; @@ -1098,7 +1097,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) argv += args_used; argc -= args_used; - r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin); + r = dm_get_device(ti, origin_path, origin_mode, &s->origin); if (r) { ti->error = "Cannot get origin device"; goto bad_origin; @@ -2100,8 +2099,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - r = dm_get_device(ti, argv[0], 0, ti->len, - dm_table_get_mode(ti->table), &dev); + r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev); if (r) { ti->error = "Cannot get target device"; return r; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index bd58703ee8f..e610725db76 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -80,8 +80,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, if (sscanf(argv[1], "%llu", &start) != 1) return -EINVAL; - if (dm_get_device(ti, argv[0], start, sc->stripe_width, - dm_table_get_mode(ti->table), + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &sc->stripe[stripe].dev)) return -ENXIO; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7d70cca585a..9924ea23032 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -429,8 +429,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, * it's already present. */ static int __table_get_device(struct dm_table *t, struct dm_target *ti, - const char *path, sector_t start, sector_t len, - fmode_t mode, struct dm_dev **result) + const char *path, fmode_t mode, struct dm_dev **result) { int r; dev_t uninitialized_var(dev); @@ -527,11 +526,10 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, } EXPORT_SYMBOL_GPL(dm_set_device_limits); -int dm_get_device(struct dm_target *ti, const char *path, sector_t start, - sector_t len, fmode_t mode, struct dm_dev **result) +int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, + struct dm_dev **result) { - return __table_get_device(ti->table, ti, path, - start, len, mode, result); + return __table_get_device(ti->table, ti, path, mode, result); } -- cgit v1.2.3 From a97f925a32aad2a37971d7bfb657006acf04e42d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 6 Mar 2010 02:32:29 +0000 Subject: dm: free dm_io before bio_endio not after Free the dm_io structure before calling bio_endio() instead of after it, to ensure that the io_pool containing it is not referenced after it is freed. This partially fixes a problem described here https://www.redhat.com/archives/dm-devel/2010-February/msg00109.html thread 1: bio_endio(bio, io_error); /* scheduling happens */ thread 2: close the device remove the device thread 1: free_io(md, io); Thread 2, when removing the device, sees non-empty md->io_pool (because the io hasn't been freed by thread 1 yet) and may crash with BUG in mempool_free. Thread 1 may also crash, when freeing into a nonexisting mempool. To fix this we must make sure that bio_endio() is the last call and the md structure is not accessed afterwards. There is another bio_endio in process_barrier, but it is called from the thread and the thread is destroyed prior to freeing the mempools, so this call is not affected by the bug. A similar bug exists with module unloads - the module may be unloaded immediately after bio_endio - but that is more difficult to fix. Signed-off-by: Mikulas Patocka Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 21222f5193f..7199846364e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -635,8 +635,10 @@ static void dec_pending(struct dm_io *io, int error) if (!md->barrier_error && io_error != -EOPNOTSUPP) md->barrier_error = io_error; end_io_acct(io); + free_io(md, io); } else { end_io_acct(io); + free_io(md, io); if (io_error != DM_ENDIO_REQUEUE) { trace_block_bio_complete(md->queue, bio); @@ -644,8 +646,6 @@ static void dec_pending(struct dm_io *io, int error) bio_endio(bio, io_error); } } - - free_io(md, io); } } -- cgit v1.2.3 From 3abf85b5b5851b5f28d3d8a920ebb844edd08352 Mon Sep 17 00:00:00 2001 From: Peter Rajnoha Date: Sat, 6 Mar 2010 02:32:31 +0000 Subject: dm ioctl: introduce flag indicating uevent was generated Set a new DM_UEVENT_GENERATED_FLAG when returning from ioctls to indicate that a uevent was actually generated. This tells the userspace caller that it may need to wait for the event to be processed. Signed-off-by: Peter Rajnoha Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 19 ++++++++++++------- drivers/md/dm.c | 7 ++++--- drivers/md/dm.h | 4 ++-- 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e3cf5686d0a..d7500e1c26f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -285,7 +285,8 @@ retry: up_write(&_hash_lock); } -static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) +static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old, + const char *new) { char *new_name, *old_name; struct hash_cell *hc; @@ -344,7 +345,8 @@ static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) dm_table_put(table); } - dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie); + if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie)) + *flags |= DM_UEVENT_GENERATED_FLAG; dm_put(hc->md); up_write(&_hash_lock); @@ -736,10 +738,10 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) __hash_remove(hc); up_write(&_hash_lock); - dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr); + if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) + param->flags |= DM_UEVENT_GENERATED_FLAG; dm_put(md); - param->data_size = 0; return 0; } @@ -773,7 +775,9 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) return r; param->data_size = 0; - return dm_hash_rename(param->event_nr, param->name, new_name); + + return dm_hash_rename(param->event_nr, ¶m->flags, param->name, + new_name); } static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) @@ -899,8 +903,8 @@ static int do_resume(struct dm_ioctl *param) if (dm_suspended_md(md)) { r = dm_resume(md); - if (!r) - dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr); + if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) + param->flags |= DM_UEVENT_GENERATED_FLAG; } if (old_map) @@ -1477,6 +1481,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) { /* Always clear this flag */ param->flags &= ~DM_BUFFER_FULL_FLAG; + param->flags &= ~DM_UEVENT_GENERATED_FLAG; /* Ignores parameters */ if (cmd == DM_REMOVE_ALL_CMD || diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7199846364e..d21e1284604 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2618,18 +2618,19 @@ out: /*----------------------------------------------------------------- * Event notification. *---------------------------------------------------------------*/ -void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, +int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, unsigned cookie) { char udev_cookie[DM_COOKIE_LENGTH]; char *envp[] = { udev_cookie, NULL }; if (!cookie) - kobject_uevent(&disk_to_dev(md->disk)->kobj, action); + return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); else { snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", DM_COOKIE_ENV_VAR_NAME, cookie); - kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); + return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, + action, envp); } } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 8dadaa5bc39..bad1724d486 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -125,8 +125,8 @@ void dm_stripe_exit(void); int dm_open_count(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md); -void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, - unsigned cookie); +int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, + unsigned cookie); int dm_io_init(void); void dm_io_exit(void); -- cgit v1.2.3 From 924e600d417ead9ef67043988055ba236f114718 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 6 Mar 2010 02:32:33 +0000 Subject: dm: eliminate some holes data structures Eliminate a 4-byte hole in 'struct dm_io_memory' by moving 'offset' above the 'ptr' to which it applies (size reduced from 24 to 16 bytes). And by association, 1-4 byte hole is eliminated in 'struct dm_io_request' (size reduced from 56 to 48 bytes). Eliminate all 6 4-byte holes and 1 cache-line in 'struct dm_snapshot' (size reduced from 392 to 368 bytes). Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 0789c22ff0d..54853773510 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -83,10 +83,10 @@ struct dm_snapshot { /* Whether or not owning mapped_device is suspended */ int suspended; - mempool_t *pending_pool; - atomic_t pending_exceptions_count; + mempool_t *pending_pool; + struct dm_exception_table pending; struct dm_exception_table complete; @@ -96,6 +96,11 @@ struct dm_snapshot { */ spinlock_t pe_lock; + /* Chunks with outstanding reads */ + spinlock_t tracked_chunk_lock; + mempool_t *tracked_chunk_pool; + struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; + /* The on disk metadata handler */ struct dm_exception_store *store; @@ -105,10 +110,12 @@ struct dm_snapshot { struct bio_list queued_bios; struct work_struct queued_bios_work; - /* Chunks with outstanding reads */ - mempool_t *tracked_chunk_pool; - spinlock_t tracked_chunk_lock; - struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; + /* Wait for events based on state_bits */ + unsigned long state_bits; + + /* Range of chunks currently being merged. */ + chunk_t first_merging_chunk; + int num_merging_chunks; /* * The merge operation failed if this flag is set. @@ -125,13 +132,6 @@ struct dm_snapshot { */ int merge_failed; - /* Wait for events based on state_bits */ - unsigned long state_bits; - - /* Range of chunks currently being merged. */ - chunk_t first_merging_chunk; - int num_merging_chunks; - /* * Incoming bios that overlap with chunks being merged must wait * for them to be committed. -- cgit v1.2.3 From f070304094edb8d516423e79edd27c97ec2020b0 Mon Sep 17 00:00:00 2001 From: Takahiro Yasui Date: Sat, 6 Mar 2010 02:32:35 +0000 Subject: dm raid1: fix deadlock when suspending failed device To prevent deadlock, bios in the hold list should be flushed before dm_rh_stop_recovery() is called in mirror_suspend(). The recovery can't start because there are pending bios and therefore dm_rh_stop_recovery deadlocks. When there are pending bios in the hold list, the recovery waits for the completion of the bios after recovery_count is acquired. The recovery_count is released when the recovery finished, however, the bios in the hold list are processed after dm_rh_stop_recovery() in mirror_presuspend(). dm_rh_stop_recovery() also acquires recovery_count, then deadlock occurs. Signed-off-by: Takahiro Yasui Signed-off-by: Alasdair G Kergon Reviewed-by: Mikulas Patocka --- drivers/md/dm-raid1.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 6d66ddf3907..ddda531723d 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -465,9 +465,17 @@ static void map_region(struct dm_io_region *io, struct mirror *m, static void hold_bio(struct mirror_set *ms, struct bio *bio) { /* - * If device is suspended, complete the bio. + * Lock is required to avoid race condition during suspend + * process. */ + spin_lock_irq(&ms->lock); + if (atomic_read(&ms->suspend)) { + spin_unlock_irq(&ms->lock); + + /* + * If device is suspended, complete the bio. + */ if (dm_noflush_suspending(ms->ti)) bio_endio(bio, DM_ENDIO_REQUEUE); else @@ -478,7 +486,6 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio) /* * Hold bio until the suspend is complete. */ - spin_lock_irq(&ms->lock); bio_list_add(&ms->holds, bio); spin_unlock_irq(&ms->lock); } @@ -1260,6 +1267,20 @@ static void mirror_presuspend(struct dm_target *ti) atomic_set(&ms->suspend, 1); + /* + * Process bios in the hold list to start recovery waiting + * for bios in the hold list. After the process, no bio has + * a chance to be added in the hold list because ms->suspend + * is set. + */ + spin_lock_irq(&ms->lock); + holds = ms->holds; + bio_list_init(&ms->holds); + spin_unlock_irq(&ms->lock); + + while ((bio = bio_list_pop(&holds))) + hold_bio(ms, bio); + /* * We must finish up all the work that we've * generated (i.e. recovery work). @@ -1280,22 +1301,6 @@ static void mirror_presuspend(struct dm_target *ti) * we know that all of our I/O has been pushed. */ flush_workqueue(ms->kmirrord_wq); - - /* - * Now set ms->suspend is set and the workqueue flushed, no more - * entries can be added to ms->hold list, so process it. - * - * Bios can still arrive concurrently with or after this - * presuspend function, but they cannot join the hold list - * because ms->suspend is set. - */ - spin_lock_irq(&ms->lock); - holds = ms->holds; - bio_list_init(&ms->holds); - spin_unlock_irq(&ms->lock); - - while ((bio = bio_list_pop(&holds))) - hold_bio(ms, bio); } static void mirror_postsuspend(struct dm_target *ti) -- cgit v1.2.3