diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 200 |
1 files changed, 142 insertions, 58 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 788ba96a625..424f7b048c3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -6,7 +6,6 @@ */ #include "dm.h" -#include "dm-bio-list.h" #include "dm-uevent.h" #include <linux/init.h> @@ -89,12 +88,13 @@ union map_info *dm_get_mapinfo(struct bio *bio) /* * Bits for the md->flags field. */ -#define DMF_BLOCK_IO 0 +#define DMF_BLOCK_IO_FOR_SUSPEND 0 #define DMF_SUSPENDED 1 #define DMF_FROZEN 2 #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +#define DMF_QUEUE_IO_TO_THREAD 6 /* * Work processed by per-device workqueue. @@ -124,6 +124,11 @@ struct mapped_device { spinlock_t deferred_lock; /* + * An error from the barrier request currently being processed. + */ + int barrier_error; + + /* * Processing queue (flush/barriers) */ struct workqueue_struct *wq; @@ -424,6 +429,10 @@ static void end_io_acct(struct dm_io *io) part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); part_stat_unlock(); + /* + * After this is decremented the bio must not be touched if it is + * a barrier. + */ dm_disk(md)->part0.in_flight = pending = atomic_dec_return(&md->pending); @@ -435,21 +444,18 @@ static void end_io_acct(struct dm_io *io) /* * Add the bio to the list of deferred io. */ -static int queue_io(struct mapped_device *md, struct bio *bio) +static void queue_io(struct mapped_device *md, struct bio *bio) { down_write(&md->io_lock); - if (!test_bit(DMF_BLOCK_IO, &md->flags)) { - up_write(&md->io_lock); - return 1; - } - spin_lock_irq(&md->deferred_lock); bio_list_add(&md->deferred, bio); spin_unlock_irq(&md->deferred_lock); + if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) + queue_work(md->wq, &md->work); + up_write(&md->io_lock); - return 0; /* deferred successfully */ } /* @@ -533,25 +539,35 @@ static void dec_pending(struct dm_io *io, int error) */ spin_lock_irqsave(&md->deferred_lock, flags); if (__noflush_suspending(md)) - bio_list_add(&md->deferred, io->bio); + bio_list_add_head(&md->deferred, io->bio); else /* noflush suspend was interrupted. */ io->error = -EIO; spin_unlock_irqrestore(&md->deferred_lock, flags); } - end_io_acct(io); - io_error = io->error; bio = io->bio; - free_io(md, io); + if (bio_barrier(bio)) { + /* + * There can be just one barrier request so we use + * a per-device variable for error reporting. + * Note that you can't touch the bio after end_io_acct + */ + md->barrier_error = io_error; + end_io_acct(io); + } else { + end_io_acct(io); - if (io_error != DM_ENDIO_REQUEUE) { - trace_block_bio_complete(md->queue, bio); + if (io_error != DM_ENDIO_REQUEUE) { + trace_block_bio_complete(md->queue, bio); - bio_endio(bio, io_error); + bio_endio(bio, io_error); + } } + + free_io(md, io); } } @@ -693,13 +709,19 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, clone->bi_sector = sector; clone->bi_bdev = bio->bi_bdev; - clone->bi_rw = bio->bi_rw; + clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER); clone->bi_vcnt = 1; clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; clone->bi_io_vec->bv_len = clone->bi_size; clone->bi_flags |= 1 << BIO_CLONED; + if (bio_integrity(bio)) { + bio_integrity_clone(clone, bio, GFP_NOIO); + bio_integrity_trim(clone, + bio_sector_offset(bio, idx, offset), len); + } + return clone; } @@ -714,6 +736,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); __bio_clone(clone, bio); + clone->bi_rw &= ~(1 << BIO_RW_BARRIER); clone->bi_destructor = dm_bio_destructor; clone->bi_sector = sector; clone->bi_idx = idx; @@ -721,6 +744,14 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone->bi_size = to_bytes(len); clone->bi_flags &= ~(1 << BIO_SEG_VALID); + if (bio_integrity(bio)) { + bio_integrity_clone(clone, bio, GFP_NOIO); + + if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) + bio_integrity_trim(clone, + bio_sector_offset(bio, idx, 0), len); + } + return clone; } @@ -834,14 +865,13 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.map = dm_get_table(md); if (unlikely(!ci.map)) { - bio_io_error(bio); - return; - } - if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { - dm_table_put(ci.map); - bio_endio(bio, -EOPNOTSUPP); + if (!bio_barrier(bio)) + bio_io_error(bio); + else + md->barrier_error = -EIO; return; } + ci.md = md; ci.bio = bio; ci.io = alloc_io(md); @@ -918,7 +948,6 @@ out: */ static int dm_request(struct request_queue *q, struct bio *bio) { - int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; int cpu; @@ -931,34 +960,27 @@ static int dm_request(struct request_queue *q, struct bio *bio) part_stat_unlock(); /* - * If we're suspended we have to queue - * this io for later. + * If we're suspended or the thread is processing barriers + * we have to queue this io for later. */ - while (test_bit(DMF_BLOCK_IO, &md->flags)) { + if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || + unlikely(bio_barrier(bio))) { up_read(&md->io_lock); - if (bio_rw(bio) != READA) - r = queue_io(md, bio); + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && + bio_rw(bio) == READA) { + bio_io_error(bio); + return 0; + } - if (r <= 0) - goto out_req; + queue_io(md, bio); - /* - * We're in a while loop, because someone could suspend - * before we get to the following read lock. - */ - down_read(&md->io_lock); + return 0; } __split_and_process_bio(md, bio); up_read(&md->io_lock); return 0; - -out_req: - if (r < 0) - bio_io_error(bio); - - return 0; } static void dm_unplug_all(struct request_queue *q) @@ -978,7 +1000,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) struct mapped_device *md = congested_data; struct dm_table *map; - if (!test_bit(DMF_BLOCK_IO, &md->flags)) { + if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { map = dm_get_table(md); if (map) { r = dm_table_any_congested(map, bdi_bits); @@ -1193,6 +1215,7 @@ static void free_dev(struct mapped_device *md) mempool_destroy(md->tio_pool); mempool_destroy(md->io_pool); bioset_free(md->bs); + blk_integrity_unregister(md->disk); del_gendisk(md->disk); free_minor(minor); @@ -1406,6 +1429,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) return r; } +static int dm_flush(struct mapped_device *md) +{ + dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); + return 0; +} + +static void process_barrier(struct mapped_device *md, struct bio *bio) +{ + int error = dm_flush(md); + + if (unlikely(error)) { + bio_endio(bio, error); + return; + } + if (bio_empty_barrier(bio)) { + bio_endio(bio, 0); + return; + } + + __split_and_process_bio(md, bio); + + error = dm_flush(md); + + if (!error && md->barrier_error) + error = md->barrier_error; + + if (md->barrier_error != DM_ENDIO_REQUEUE) + bio_endio(bio, error); +} + /* * Process the deferred bios */ @@ -1417,25 +1470,34 @@ static void dm_wq_work(struct work_struct *work) down_write(&md->io_lock); -next_bio: - spin_lock_irq(&md->deferred_lock); - c = bio_list_pop(&md->deferred); - spin_unlock_irq(&md->deferred_lock); + while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { + spin_lock_irq(&md->deferred_lock); + c = bio_list_pop(&md->deferred); + spin_unlock_irq(&md->deferred_lock); - if (c) { - __split_and_process_bio(md, c); - goto next_bio; - } + if (!c) { + clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); + break; + } - clear_bit(DMF_BLOCK_IO, &md->flags); + up_write(&md->io_lock); + + if (bio_barrier(c)) + process_barrier(md, c); + else + __split_and_process_bio(md, c); + + down_write(&md->io_lock); + } up_write(&md->io_lock); } static void dm_queue_flush(struct mapped_device *md) { + clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + smp_mb__after_clear_bit(); queue_work(md->wq, &md->work); - flush_workqueue(md->wq); } /* @@ -1553,20 +1615,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) } /* - * First we set the BLOCK_IO flag so no more ios will be mapped. + * Here we must make sure that no processes are submitting requests + * to target drivers i.e. no one may be executing + * __split_and_process_bio. This is called from dm_request and + * dm_wq_work. + * + * To get all processes out of __split_and_process_bio in dm_request, + * we take the write lock. To prevent any process from reentering + * __split_and_process_bio from dm_request, we set + * DMF_QUEUE_IO_TO_THREAD. + * + * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND + * and call flush_workqueue(md->wq). flush_workqueue will wait until + * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any + * further calls to __split_and_process_bio from dm_wq_work. */ down_write(&md->io_lock); - set_bit(DMF_BLOCK_IO, &md->flags); - + set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); up_write(&md->io_lock); + flush_workqueue(md->wq); + /* - * Wait for the already-mapped ios to complete. + * At this point no more requests are entering target request routines. + * We call dm_wait_for_completion to wait for all existing requests + * to finish. */ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); down_write(&md->io_lock); - if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); up_write(&md->io_lock); @@ -1579,6 +1657,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) goto out; /* pushback list is already flushed, so skip flush */ } + /* + * If dm_wait_for_completion returned 0, the device is completely + * quiescent now. There is no request-processing activity. All new + * requests are being added to md->deferred list. + */ + dm_table_postsuspend_targets(map); set_bit(DMF_SUSPENDED, &md->flags); |