From eae9acd13a8d14b50c00a961fa959606f34bbd92 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:08:25 +0100 Subject: Support 'discard sectors' operation in translation layer support core Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- drivers/mtd/mtd_blkdevs.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers') diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 9ff007c4962..681d5aca2af 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -32,6 +32,14 @@ struct mtd_blkcore_priv { spinlock_t queue_lock; }; +static int blktrans_discard_request(struct request_queue *q, + struct request *req) +{ + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_DISCARD; + return 0; +} + static int do_blktrans_request(struct mtd_blktrans_ops *tr, struct mtd_blktrans_dev *dev, struct request *req) @@ -44,6 +52,10 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, buf = req->buffer; + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_DISCARD) + return !tr->discard(dev, block, nsect); + if (!blk_fs_request(req)) return 0; @@ -367,6 +379,10 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) tr->blkcore_priv->rq->queuedata = tr; blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); + if (tr->discard) + blk_queue_set_discard(tr->blkcore_priv->rq, + blktrans_discard_request); + tr->blkshift = ffs(tr->blksize) - 1; tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr, -- cgit v1.2.3 From fdc53971bce56d299cb5f1f06ecbff30b34cbaf2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:08:56 +0100 Subject: Support 'discard sectors' operation. We can benefit from knowing that the file system no longer cares about the contents of certain sectors, by throwing them away immediately and then never having to garbage collect them, and using the extra free space to make our operations more efficient. Do so. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- drivers/mtd/ftl.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers') diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index f34f20c7891..9bf581c4f74 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -1005,6 +1005,29 @@ static int ftl_writesect(struct mtd_blktrans_dev *dev, return ftl_write((void *)dev, buf, block, 1); } +static int ftl_discardsect(struct mtd_blktrans_dev *dev, + unsigned long sector, unsigned nr_sects) +{ + partition_t *part = (void *)dev; + uint32_t bsize = 1 << part->header.EraseUnitSize; + + DEBUG(1, "FTL erase sector %ld for %d sectors\n", + sector, nr_sects); + + while (nr_sects) { + uint32_t old_addr = part->VirtualBlockMap[sector]; + if (old_addr != 0xffffffff) { + part->VirtualBlockMap[sector] = 0xffffffff; + part->EUNInfo[old_addr/bsize].Deleted++; + if (set_bam_entry(part, old_addr, 0)) + return -EIO; + } + nr_sects--; + sector++; + } + + return 0; +} /*====================================================================*/ static void ftl_freepart(partition_t *part) @@ -1069,6 +1092,7 @@ static struct mtd_blktrans_ops ftl_tr = { .blksize = SECTOR_SIZE, .readsect = ftl_readsect, .writesect = ftl_writesect, + .discard = ftl_discardsect, .getgeo = ftl_getgeo, .add_mtd = ftl_add_mtd, .remove_dev = ftl_remove_dev, -- cgit v1.2.3 From 1a8e2bddd5c29008f311613e75925fecbf522c5b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 13 Aug 2008 12:35:09 +0100 Subject: Kill REQ_TYPE_FLUSH It was only used by ps3disk, and it should probably have been REQ_TYPE_LINUX_BLOCK + REQ_LB_OP_FLUSH. Signed-off-by: David Woodhouse Signed-off-by: Jens Axboe --- drivers/block/ps3disk.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index d797e209951..4b0d6c7f4c6 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, if (blk_fs_request(req)) { if (ps3disk_submit_request_sg(dev, req)) break; - } else if (req->cmd_type == REQ_TYPE_FLUSH) { + } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_FLUSH) { if (ps3disk_submit_flush_request(dev, req)) break; } else { @@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) return IRQ_HANDLED; } - if (req->cmd_type == REQ_TYPE_FLUSH) { + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_FLUSH) { read = 0; num_sectors = req->hard_cur_sectors; op = "flush"; @@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req) dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - req->cmd_type = REQ_TYPE_FLUSH; + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_FLUSH; } static unsigned long ps3disk_mask; -- cgit v1.2.3 From 766ca4428d1239a970926856c447310c9c191af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Thu, 14 Aug 2008 09:59:13 +0200 Subject: virtio_blk: use a wrapper function to access io context information of IO requests struct request has an ioprio member but it is never updated because currently bios do not hold io context information. The implication of this is that virtio_blk ends up passing useless information to the backend driver. That said, some IO schedulers such as CFQ do store io context information in struct request, but use private members for that, which means that that information cannot be directly accessed in a IO scheduler-independent way. This patch adds a function to obtain the ioprio of a request. We should avoid accessing ioprio directly and use this function instead, so that its users do not have to care about future changes in block layer structures or what the currently active IO controller is. This patch does not introduce any functional changes but paves the way for future clean-ups and enhancements. Signed-off-by: Fernando Luis Vazquez Cao Acked-by: Rusty Russell Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 42251095134..879506a2c23 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -84,11 +84,11 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, if (blk_fs_request(vbr->req)) { vbr->out_hdr.type = 0; vbr->out_hdr.sector = vbr->req->sector; - vbr->out_hdr.ioprio = vbr->req->ioprio; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else if (blk_pc_request(vbr->req)) { vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = vbr->req->ioprio; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else { /* We don't put anything else in the queue. */ BUG(); -- cgit v1.2.3 From 5df97b91b5d7ed426034fcc84cb6e7cf682b8838 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 15 Aug 2008 10:20:02 +0200 Subject: drop vmerge accounting Remove hw_segments field from struct bio and struct request. Without virtual merge accounting they have no purpose. Signed-off-by: Mikulas Patocka Signed-off-by: Jens Axboe --- drivers/md/raid1.c | 3 --- drivers/md/raid10.c | 3 --- 2 files changed, 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 03a5ab705c2..28a3869dcfd 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1302,9 +1302,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) sbio->bi_size = r1_bio->sectors << 9; sbio->bi_idx = 0; sbio->bi_phys_segments = 0; - sbio->bi_hw_segments = 0; - sbio->bi_hw_front_size = 0; - sbio->bi_hw_back_size = 0; sbio->bi_flags &= ~(BIO_POOL_MASK - 1); sbio->bi_flags |= 1 << BIO_UPTODATE; sbio->bi_next = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e34cd0e6247..0f40688503e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1345,9 +1345,6 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) tbio->bi_size = r10_bio->sectors << 9; tbio->bi_idx = 0; tbio->bi_phys_segments = 0; - tbio->bi_hw_segments = 0; - tbio->bi_hw_front_size = 0; - tbio->bi_hw_back_size = 0; tbio->bi_flags &= ~(BIO_POOL_MASK - 1); tbio->bi_flags |= 1 << BIO_UPTODATE; tbio->bi_next = NULL; -- cgit v1.2.3 From 960e739d9e9f1c2346d8bdc65299ee2e1ed42218 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2008 10:41:18 +0200 Subject: block: raid fixups for removal of bi_hw_segments Signed-off-by: Jens Axboe --- drivers/md/raid1.c | 1 - drivers/md/raid10.c | 1 - drivers/md/raid5.c | 66 +++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 51 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 28a3869dcfd..0b82030c265 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1787,7 +1787,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; bio->bi_size = 0; bio->bi_end_io = NULL; bio->bi_private = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0f40688503e..d3b9aa09628 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1944,7 +1944,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; bio->bi_size = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 224de022e7c..05b22925cce 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -101,6 +101,40 @@ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); #endif +/* + * We maintain a biased count of active stripes in the bottom 8 bits of + * bi_phys_segments, and a count of processed stripes in the upper 8 bits + */ +static inline int raid5_bi_phys_segments(struct bio *bio) +{ + return bio->bi_phys_segments & 0xff; +} + +static inline int raid5_bi_hw_segments(struct bio *bio) +{ + return (bio->bi_phys_segments >> 8) & 0xff; +} + +static inline int raid5_dec_bi_phys_segments(struct bio *bio) +{ + --bio->bi_phys_segments; + return raid5_bi_phys_segments(bio); +} + +static inline int raid5_dec_bi_hw_segments(struct bio *bio) +{ + unsigned short val = raid5_bi_hw_segments(bio); + + --val; + bio->bi_phys_segments = (val << 8) | raid5_bi_phys_segments(bio); + return val; +} + +static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) +{ + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 8); +} + static inline int raid6_next_disk(int disk, int raid_disks) { disk++; @@ -507,7 +541,7 @@ static void ops_complete_biofill(void *stripe_head_ref) while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { rbi2 = r5_next_bio(rbi, dev->sector); - if (--rbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (*bip) bi->bi_next = *bip; *bip = bi; - bi->bi_phys_segments ++; + bi->bi_phys_segments++; spin_unlock_irq(&conf->device_lock); spin_unlock(&sh->lock); @@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { bi->bi_next = *return_bi; *return_bi = bi; } @@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(raid5_conf_t *conf, while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); - if (--wbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(wbi)) { md_write_end(conf->mddev); wbi->bi_next = *return_bi; *return_bi = wbi; @@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) copy_data(0, rbi, dev->page, dev->sector); rbi2 = r5_next_bio(rbi, dev->sector); spin_lock_irq(&conf->device_lock); - if (--rbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf) if(bi) { conf->retry_read_aligned_list = bi->bi_next; bi->bi_next = NULL; + /* + * this sets the active strip count to 1 and the processed + * strip count to zero (upper 8 bits) + */ bi->bi_phys_segments = 1; /* biased count of active stripes */ - bi->bi_hw_segments = 0; /* count of processed stripes */ } return bi; @@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi) if ((bi->bi_size>>9) > q->max_sectors) return 0; blk_recount_segments(q, bi); - if (bi->bi_phys_segments > q->max_phys_segments || - bi->bi_hw_segments > q->max_hw_segments) + if (bi->bi_phys_segments > q->max_phys_segments) return 0; if (q->merge_bvec_fn) @@ -3468,7 +3504,7 @@ static int make_request(struct request_queue *q, struct bio * bi) } spin_lock_irq(&conf->device_lock); - remaining = --bi->bi_phys_segments; + remaining = raid5_dec_bi_phys_segments(bi); spin_unlock_irq(&conf->device_lock); if (remaining == 0) { @@ -3752,7 +3788,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) sector += STRIPE_SECTORS, scnt++) { - if (scnt < raid_bio->bi_hw_segments) + if (scnt < raid5_bi_hw_segments(raid_bio)) /* already done this stripe */ continue; @@ -3760,7 +3796,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) if (!sh) { /* failed to get a stripe - must wait */ - raid_bio->bi_hw_segments = scnt; + raid5_set_bi_hw_segments(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } @@ -3768,7 +3804,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) set_bit(R5_ReadError, &sh->dev[dd_idx].flags); if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { release_stripe(sh); - raid_bio->bi_hw_segments = scnt; + raid5_set_bi_hw_segments(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } @@ -3778,7 +3814,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) handled++; } spin_lock_irq(&conf->device_lock); - remaining = --raid_bio->bi_phys_segments; + remaining = raid5_dec_bi_phys_segments(raid_bio); spin_unlock_irq(&conf->device_lock); if (remaining == 0) bio_endio(raid_bio, 0); -- cgit v1.2.3 From 5b99c2ffa980528a197f26c7d876cceeccce8dd5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Aug 2008 10:56:11 +0200 Subject: block: make bi_phys_segments an unsigned int instead of short raid5 can overflow with more than 255 stripes, and we can increase it to an int for free on both 32 and 64-bit archs due to the padding. Signed-off-by: Jens Axboe --- drivers/md/raid5.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 05b22925cce..37e546528f9 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -102,17 +102,17 @@ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); #endif /* - * We maintain a biased count of active stripes in the bottom 8 bits of - * bi_phys_segments, and a count of processed stripes in the upper 8 bits + * We maintain a biased count of active stripes in the bottom 16 bits of + * bi_phys_segments, and a count of processed stripes in the upper 16 bits */ static inline int raid5_bi_phys_segments(struct bio *bio) { - return bio->bi_phys_segments & 0xff; + return bio->bi_phys_segments & 0xffff; } static inline int raid5_bi_hw_segments(struct bio *bio) { - return (bio->bi_phys_segments >> 8) & 0xff; + return (bio->bi_phys_segments >> 16) & 0xffff; } static inline int raid5_dec_bi_phys_segments(struct bio *bio) @@ -126,13 +126,13 @@ static inline int raid5_dec_bi_hw_segments(struct bio *bio) unsigned short val = raid5_bi_hw_segments(bio); --val; - bio->bi_phys_segments = (val << 8) | raid5_bi_phys_segments(bio); + bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); return val; } static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) { - bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 8); + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); } static inline int raid6_next_disk(int disk, int raid_disks) -- cgit v1.2.3 From 5a3ceb861663040f9ef0176df4aaa494bba5e352 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:50:19 +0200 Subject: driver-core: use klist for class device list and implement iterator Iterating over entries using callback usually isn't too fun especially when the entry being iterated over can't be manipulated freely. This patch converts class->p->class_devices to klist and implements class device iterator so that the users can freely build their own control structure. The users are also free to call back into class code without worrying about locking. class_for_each_device() and class_find_device() are converted to use the new iterators, so their users don't have to worry about locking anymore either. Note: This depends on klist-dont-iterate-over-deleted-entries patch because class_intf->add/remove_dev() depends on proper synchronization with device removal. Signed-off-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: Jens Axboe Signed-off-by: Jens Axboe --- drivers/base/base.h | 2 +- drivers/base/class.c | 136 ++++++++++++++++++++++++++++++++++++++------------- drivers/base/core.c | 6 +-- 3 files changed, 107 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/base/base.h b/drivers/base/base.h index 31dc0cd84af..0a5f055dffb 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -54,7 +54,7 @@ struct driver_private { */ struct class_private { struct kset class_subsys; - struct list_head class_devices; + struct klist class_devices; struct list_head class_interfaces; struct kset class_dirs; struct mutex class_mutex; diff --git a/drivers/base/class.c b/drivers/base/class.c index cc5e28c8885..eb85e431230 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -135,6 +135,20 @@ static void remove_class_attrs(struct class *cls) } } +static void klist_class_dev_get(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_class); + + get_device(dev); +} + +static void klist_class_dev_put(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_class); + + put_device(dev); +} + int __class_register(struct class *cls, struct lock_class_key *key) { struct class_private *cp; @@ -145,7 +159,7 @@ int __class_register(struct class *cls, struct lock_class_key *key) cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; - INIT_LIST_HEAD(&cp->class_devices); + klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put); INIT_LIST_HEAD(&cp->class_interfaces); kset_init(&cp->class_dirs); __mutex_init(&cp->class_mutex, "struct class mutex", key); @@ -268,6 +282,71 @@ char *make_class_name(const char *name, struct kobject *kobj) } #endif +/** + * class_dev_iter_init - initialize class device iterator + * @iter: class iterator to initialize + * @class: the class we wanna iterate over + * @start: the device to start iterating from, if any + * @type: device_type of the devices to iterate over, NULL for all + * + * Initialize class iterator @iter such that it iterates over devices + * of @class. If @start is set, the list iteration will start there, + * otherwise if it is NULL, the iteration starts at the beginning of + * the list. + */ +void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, + struct device *start, const struct device_type *type) +{ + struct klist_node *start_knode = NULL; + + if (start) + start_knode = &start->knode_class; + klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode); + iter->type = type; +} +EXPORT_SYMBOL_GPL(class_dev_iter_init); + +/** + * class_dev_iter_next - iterate to the next device + * @iter: class iterator to proceed + * + * Proceed @iter to the next device and return it. Returns NULL if + * iteration is complete. + * + * The returned device is referenced and won't be released till + * iterator is proceed to the next device or exited. The caller is + * free to do whatever it wants to do with the device including + * calling back into class code. + */ +struct device *class_dev_iter_next(struct class_dev_iter *iter) +{ + struct klist_node *knode; + struct device *dev; + + while (1) { + knode = klist_next(&iter->ki); + if (!knode) + return NULL; + dev = container_of(knode, struct device, knode_class); + if (!iter->type || iter->type == dev->type) + return dev; + } +} +EXPORT_SYMBOL_GPL(class_dev_iter_next); + +/** + * class_dev_iter_exit - finish iteration + * @iter: class iterator to finish + * + * Finish an iteration. Always call this function after iteration is + * complete whether the iteration ran till the end or not. + */ +void class_dev_iter_exit(struct class_dev_iter *iter) +{ + klist_iter_exit(&iter->ki); +} +EXPORT_SYMBOL_GPL(class_dev_iter_exit); + /** * class_for_each_device - device iterator * @class: the class we're iterating @@ -283,13 +362,13 @@ char *make_class_name(const char *name, struct kobject *kobj) * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. * - * Note, we hold class->class_mutex in this function, so it can not be - * re-acquired in @fn, otherwise it will self-deadlocking. For - * example, calls to add or remove class members would be verboten. + * @fn is allowed to do anything including calling back into class + * code. There's no locking restriction. */ int class_for_each_device(struct class *class, struct device *start, void *data, int (*fn)(struct device *, void *)) { + struct class_dev_iter iter; struct device *dev; int error = 0; @@ -301,20 +380,13 @@ int class_for_each_device(struct class *class, struct device *start, return -EINVAL; } - mutex_lock(&class->p->class_mutex); - list_for_each_entry(dev, &class->p->class_devices, node) { - if (start) { - if (start == dev) - start = NULL; - continue; - } - dev = get_device(dev); + class_dev_iter_init(&iter, class, start, NULL); + while ((dev = class_dev_iter_next(&iter))) { error = fn(dev, data); - put_device(dev); if (error) break; } - mutex_unlock(&class->p->class_mutex); + class_dev_iter_exit(&iter); return error; } @@ -337,16 +409,15 @@ EXPORT_SYMBOL_GPL(class_for_each_device); * * Note, you will need to drop the reference with put_device() after use. * - * We hold class->class_mutex in this function, so it can not be - * re-acquired in @match, otherwise it will self-deadlocking. For - * example, calls to add or remove class members would be verboten. + * @fn is allowed to do anything including calling back into class + * code. There's no locking restriction. */ struct device *class_find_device(struct class *class, struct device *start, void *data, int (*match)(struct device *, void *)) { + struct class_dev_iter iter; struct device *dev; - int found = 0; if (!class) return NULL; @@ -356,29 +427,23 @@ struct device *class_find_device(struct class *class, struct device *start, return NULL; } - mutex_lock(&class->p->class_mutex); - list_for_each_entry(dev, &class->p->class_devices, node) { - if (start) { - if (start == dev) - start = NULL; - continue; - } - dev = get_device(dev); + class_dev_iter_init(&iter, class, start, NULL); + while ((dev = class_dev_iter_next(&iter))) { if (match(dev, data)) { - found = 1; + get_device(dev); break; - } else - put_device(dev); + } } - mutex_unlock(&class->p->class_mutex); + class_dev_iter_exit(&iter); - return found ? dev : NULL; + return dev; } EXPORT_SYMBOL_GPL(class_find_device); int class_interface_register(struct class_interface *class_intf) { struct class *parent; + struct class_dev_iter iter; struct device *dev; if (!class_intf || !class_intf->class) @@ -391,8 +456,10 @@ int class_interface_register(struct class_interface *class_intf) mutex_lock(&parent->p->class_mutex); list_add_tail(&class_intf->node, &parent->p->class_interfaces); if (class_intf->add_dev) { - list_for_each_entry(dev, &parent->p->class_devices, node) + class_dev_iter_init(&iter, parent, NULL, NULL); + while ((dev = class_dev_iter_next(&iter))) class_intf->add_dev(dev, class_intf); + class_dev_iter_exit(&iter); } mutex_unlock(&parent->p->class_mutex); @@ -402,6 +469,7 @@ int class_interface_register(struct class_interface *class_intf) void class_interface_unregister(struct class_interface *class_intf) { struct class *parent = class_intf->class; + struct class_dev_iter iter; struct device *dev; if (!parent) @@ -410,8 +478,10 @@ void class_interface_unregister(struct class_interface *class_intf) mutex_lock(&parent->p->class_mutex); list_del_init(&class_intf->node); if (class_intf->remove_dev) { - list_for_each_entry(dev, &parent->p->class_devices, node) + class_dev_iter_init(&iter, parent, NULL, NULL); + while ((dev = class_dev_iter_next(&iter))) class_intf->remove_dev(dev, class_intf); + class_dev_iter_exit(&iter); } mutex_unlock(&parent->p->class_mutex); diff --git a/drivers/base/core.c b/drivers/base/core.c index d021c98605b..b98cb1416a2 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -536,7 +536,6 @@ void device_initialize(struct device *dev) klist_init(&dev->klist_children, klist_children_get, klist_children_put); INIT_LIST_HEAD(&dev->dma_pools); - INIT_LIST_HEAD(&dev->node); init_MUTEX(&dev->sem); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); @@ -916,7 +915,8 @@ int device_add(struct device *dev) if (dev->class) { mutex_lock(&dev->class->p->class_mutex); /* tie the class to the device */ - list_add_tail(&dev->node, &dev->class->p->class_devices); + klist_add_tail(&dev->knode_class, + &dev->class->p->class_devices); /* notify any interfaces that the device is here */ list_for_each_entry(class_intf, @@ -1032,7 +1032,7 @@ void device_del(struct device *dev) if (class_intf->remove_dev) class_intf->remove_dev(dev, class_intf); /* remove the device from the class list */ - list_del_init(&dev->node); + klist_del(&dev->knode_class); mutex_unlock(&dev->class->p->class_mutex); } device_remove_file(dev, &uevent_attr); -- cgit v1.2.3 From 310a2c1012934f590192377f65940cad4aa72b15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:17 +0900 Subject: block: misc updates This patch makes the following misc updates in preparation for disk->part dereference fix and extended block devt support. * implment part_to_disk() * fix comment about gendisk->part indexing * rename get_part() to disk_map_sector() * don't use n which is always zero while printing disk information in diskstats_show() Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 2f1746295d0..885d1409521 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -757,7 +757,7 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector const int rw = bio_data_dir(bio); struct hd_struct *part; - part = get_part(disk, sector); + part = disk_map_sector(disk, sector); all_stat_inc(disk, part, ios[rw], sector); all_stat_add(disk, part, ticks[rw], duration, sector); all_stat_add(disk, part, sectors[rw], n_sect, sector); -- cgit v1.2.3 From f331c0296f2a9fee0d396a70598b954062603015 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:01:48 +0200 Subject: block: don't depend on consecutive minor space * Implement disk_devt() and part_devt() and use them to directly access devt instead of computing it from ->major and ->first_minor. Note that all references to ->major and ->first_minor outside of block layer is used to determine devt of the disk (the part0) and as ->major and ->first_minor will continue to represent devt for the disk, converting these users aren't strictly necessary. However, convert them for consistency. * Implement disk_max_parts() to avoid directly deferencing genhd->minors. * Update bdget_disk() such that it doesn't assume consecutive minor space. * Move devt computation from register_disk() to add_disk() and make it the only one (all other usages use the initially determined value). These changes clean up the code and will help disk->part dereference fix and extended block device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/char/random.c | 6 +++--- drivers/md/dm-ioctl.c | 4 ++-- drivers/md/dm-stripe.c | 4 ++-- drivers/md/dm.c | 7 ++++--- drivers/memstick/core/mspro_block.c | 2 +- drivers/mmc/card/block.c | 2 +- drivers/s390/block/dasd_proc.c | 3 ++- drivers/s390/block/dcssblk.c | 4 ++-- drivers/scsi/sr.c | 2 +- 11 files changed, 20 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 29b7a648cc6..e1a90bbb474 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2911,7 +2911,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) if (!disk->queue) goto out_mem2; - pd->pkt_dev = MKDEV(disk->major, disk->first_minor); + pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) goto out_new_dev; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 4b0d6c7f4c6..936466f62af 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -541,7 +541,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) struct ps3disk_private *priv = dev->sbd.core.driver_data; mutex_lock(&ps3disk_mask_mutex); - __clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS, + __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, &ps3disk_mask); mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); diff --git a/drivers/char/random.c b/drivers/char/random.c index 7ce1ac4baa6..6af435b8986 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -661,10 +661,10 @@ void add_disk_randomness(struct gendisk *disk) if (!disk || !disk->random) return; /* first major is 1, so we get >= 0x200 here */ - DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor); + DEBUG_ENT("disk event %d:%d\n", + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk))); - add_timer_randomness(disk->random, - 0x100 + MKDEV(disk->major, disk->first_minor)); + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); } #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b262c0042de..c3de311117a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -426,7 +426,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size) old_nl->next = (uint32_t) ((void *) nl - (void *) old_nl); disk = dm_disk(hc->md); - nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + nl->dev = huge_encode_dev(disk_devt(disk)); nl->next = 0; strcpy(nl->name, hc->name); @@ -539,7 +539,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (dm_suspended(md)) param->flags |= DM_SUSPEND_FLAG; - param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + param->dev = huge_encode_dev(disk_devt(disk)); /* * Yes, this will be out of date by the time it gets back diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4de90ab3968..b745d8ac625 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -284,8 +284,8 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, memset(major_minor, 0, sizeof(major_minor)); sprintf(major_minor, "%d:%d", - bio->bi_bdev->bd_disk->major, - bio->bi_bdev->bd_disk->first_minor); + MAJOR(disk_devt(bio->bi_bdev->bd_disk)), + MINOR(disk_devt(bio->bi_bdev->bd_disk))); /* * Test to see which stripe drive triggered the event diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ace998ce59f..a78caad2999 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1146,7 +1146,7 @@ static void unlock_fs(struct mapped_device *md); static void free_dev(struct mapped_device *md) { - int minor = md->disk->first_minor; + int minor = MINOR(disk_devt(md->disk)); if (md->suspended_bdev) { unlock_fs(md); @@ -1267,7 +1267,7 @@ static struct mapped_device *dm_find_md(dev_t dev) md = idr_find(&_minor_idr, minor); if (md && (md == MINOR_ALLOCED || - (dm_disk(md)->first_minor != minor) || + (MINOR(disk_devt(dm_disk(md))) != minor) || test_bit(DMF_FREEING, &md->flags))) { md = NULL; goto out; @@ -1318,7 +1318,8 @@ void dm_put(struct mapped_device *md) if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { map = dm_get_table(md); - idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); + idr_replace(&_minor_idr, MINOR_ALLOCED, + MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); if (!dm_suspended(md)) { diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index d2d2318dafa..82bf649ef13 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -197,7 +197,7 @@ static int mspro_block_bd_open(struct inode *inode, struct file *filp) static int mspro_block_disk_release(struct gendisk *disk) { struct mspro_block_data *msb = disk->private_data; - int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT; + int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT; mutex_lock(&mspro_block_disk_lock); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index ebc8b9d7761..97156b689e8 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -83,7 +83,7 @@ static void mmc_blk_put(struct mmc_blk_data *md) mutex_lock(&open_lock); md->usage--; if (md->usage == 0) { - int devidx = md->disk->first_minor >> MMC_SHIFT; + int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT; __clear_bit(devidx, dev_use); put_disk(md->disk); diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 03c0e40a92f..e3b5c4d3036 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -76,7 +76,8 @@ dasd_devices_show(struct seq_file *m, void *v) /* Print kdev. */ if (block->gdp) seq_printf(m, " at (%3d:%6d)", - block->gdp->major, block->gdp->first_minor); + MAJOR(disk_devt(block->gdp)), + MINOR(disk_devt(block->gdp))); else seq_printf(m, " at (???:??????)"); /* Print device name. */ diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 711b3004b3e..9481e4a3f76 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -114,7 +114,7 @@ dcssblk_assign_free_minor(struct dcssblk_dev_info *dev_info) found = 0; // test if minor available list_for_each_entry(entry, &dcssblk_devices, lh) - if (minor == entry->gd->first_minor) + if (minor == MINOR(disk_devt(entry->gd))) found++; if (!found) break; // got unused minor } @@ -397,7 +397,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char goto unload_seg; } sprintf(dev_info->gd->disk_name, "dcssblk%d", - dev_info->gd->first_minor); + MINOR(disk_devt(dev_info->gd))); list_add_tail(&dev_info->lh, &dcssblk_devices); if (!try_module_get(THIS_MODULE)) { diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 27f5bfd1def..8dbe3798d5f 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -878,7 +878,7 @@ static void sr_kref_release(struct kref *kref) struct gendisk *disk = cd->disk; spin_lock(&sr_index_lock); - clear_bit(disk->first_minor, sr_index_bits); + clear_bit(MINOR(disk_devt(disk)), sr_index_bits); spin_unlock(&sr_index_lock); unregister_cdrom(&cd->cdi); -- cgit v1.2.3 From e71bf0d0ee89e51b92776391c5634938236977d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:03:02 +0200 Subject: block: fix disk->part[] dereferencing race disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 885d1409521..84c03d65dcc 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -757,11 +757,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector const int rw = bio_data_dir(bio); struct hd_struct *part; - part = disk_map_sector(disk, sector); + rcu_read_lock(); + + part = disk_map_sector_rcu(disk, sector); all_stat_inc(disk, part, ios[rw], sector); all_stat_add(disk, part, ticks[rw], duration, sector); all_stat_add(disk, part, sectors[rw], n_sect, sector); all_stat_add(disk, part, io_ticks, duration, sector); + + rcu_read_unlock(); } void -- cgit v1.2.3 From c9959059161ddd7bf4670cf47367033d6b2f79c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:21 +0900 Subject: block: fix diskstats access There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 15 ++++++++------- drivers/md/dm.c | 26 +++++++++++++++----------- drivers/md/linear.c | 7 +++++-- drivers/md/multipath.c | 7 +++++-- drivers/md/raid0.c | 7 +++++-- drivers/md/raid1.c | 8 +++++--- drivers/md/raid10.c | 7 +++++-- drivers/md/raid5.c | 8 +++++--- 8 files changed, 53 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 84c03d65dcc..17eed8c025d 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -756,16 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector unsigned long n_sect = bio->bi_size >> 9; const int rw = bio_data_dir(bio); struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(disk, part, ios[rw], sector); - all_stat_add(disk, part, ticks[rw], duration, sector); - all_stat_add(disk, part, sectors[rw], n_sect, sector); - all_stat_add(disk, part, io_ticks, duration, sector); - rcu_read_unlock(); + all_stat_inc(cpu, disk, part, ios[rw], sector); + all_stat_add(cpu, disk, part, ticks[rw], duration, sector); + all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); + all_stat_add(cpu, disk, part, io_ticks, duration, sector); + + disk_stat_unlock(); } void diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a78caad2999..653624792ea 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -377,12 +377,13 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; + int cpu; io->start_time = jiffies; - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_unlock(); dm_disk(md)->in_flight = atomic_inc_return(&md->pending); } @@ -391,15 +392,15 @@ static int end_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->bio; unsigned long duration = jiffies - io->start_time; - int pending; + int pending, cpu; int rw = bio_data_dir(bio); - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + cpu = disk_stat_lock(); + disk_round_stats(cpu, dm_disk(md)); + disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); + disk_stat_unlock(); - disk_stat_add(dm_disk(md), ticks[rw], duration); + dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); return !pending; } @@ -885,6 +886,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; + int cpu; /* * There is no use in forwarding any barrier request since we can't @@ -897,8 +899,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - disk_stat_inc(dm_disk(md), ios[rw]); - disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, dm_disk(md), ios[rw]); + disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * If we're suspended we have to queue diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b1eebf88c20..00cbc8e4729 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -318,14 +318,17 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) mddev_t *mddev = q->queuedata; dev_info_t *tmp_dev; sector_t block; + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c4779ccba1c..182f5a94cdc 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -147,6 +147,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); @@ -158,8 +159,10 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 18361063566..e26030fa59a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,14 +399,17 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) sector_t chunk; sector_t block, rsect; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0b82030c265..babb13036f9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -779,7 +779,7 @@ static int make_request(struct request_queue *q, struct bio * bio) struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); const int do_sync = bio_sync(bio); - int do_barriers; + int cpu, do_barriers; mdk_rdev_t *blocked_rdev; /* @@ -804,8 +804,10 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); /* * make_request() can abort the operation when READA is being diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d3b9aa09628..5ec80da0a9d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -789,6 +789,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; + int cpu; int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); @@ -843,8 +844,10 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); + disk_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 37e546528f9..5899f211515 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3387,7 +3387,7 @@ static int make_request(struct request_queue *q, struct bio * bi) sector_t logical_sector, last_sector; struct stripe_head *sh; const int rw = bio_data_dir(bi); - int remaining; + int cpu, remaining; if (unlikely(bio_barrier(bi))) { bio_endio(bi, -EOPNOTSUPP); @@ -3396,8 +3396,10 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi)); + cpu = disk_stat_lock(); + disk_stat_inc(cpu, mddev->gendisk, ios[rw]); + disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi)); + disk_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && -- cgit v1.2.3 From f615b48cc7df7cac3865ec76ac1a5bb04d3e07f4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:24 +0900 Subject: sd/ide-disk: apply extended minors to sd and ide Update sd and ide-disk such that they can take advantage of extended minors. ide-disk already has 64 minors per device and currently doesn't use extended minors although after this patch it can be turned on by simply tweaking constants. sd only had 16 minors per device causing problems on certain peculiar configurations. This patch lifts the restriction and enables it to use upto 64 minors. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/ide/ide-disk.c | 11 ++++++++--- drivers/scsi/sd.c | 9 +++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 07ef88bd109..7a88de9ada2 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -41,6 +41,10 @@ #include #include +#define IDE_DISK_PARTS (1 << PARTN_BITS) +#define IDE_DISK_MINORS IDE_DISK_PARTS +#define IDE_DISK_EXT_MINORS (IDE_DISK_PARTS - IDE_DISK_MINORS) + struct ide_disk_obj { ide_drive_t *drive; ide_driver_t *driver; @@ -1151,8 +1155,8 @@ static int ide_disk_probe(ide_drive_t *drive) if (!idkp) goto failed; - g = alloc_disk_node(1 << PARTN_BITS, - hwif_to_node(drive->hwif)); + g = alloc_disk_ext_node(IDE_DISK_MINORS, IDE_DISK_EXT_MINORS, + hwif_to_node(drive->hwif)); if (!g) goto out_free_idkp; @@ -1178,7 +1182,8 @@ static int ide_disk_probe(ide_drive_t *drive) } else drive->attach = 1; - g->minors = 1 << PARTN_BITS; + g->minors = IDE_DISK_MINORS; + g->ext_minors = IDE_DISK_EXT_MINORS; g->driverfs_dev = &drive->gendev; g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; set_capacity(g, idedisk_capacity(drive)); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e5e7d785645..d1bb0e1d2d2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -86,6 +86,10 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); +#define SD_PARTS 64 +#define SD_MINORS 16 +#define SD_EXT_MINORS (SD_PARTS - SD_MINORS) + static int sd_revalidate_disk(struct gendisk *); static int sd_probe(struct device *); static int sd_remove(struct device *); @@ -1801,7 +1805,7 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; - gd = alloc_disk(16); + gd = alloc_disk_ext(SD_MINORS, SD_EXT_MINORS); if (!gd) goto out_free; @@ -1845,7 +1849,8 @@ static int sd_probe(struct device *dev) gd->major = sd_major((index & 0xf0) >> 4); gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); - gd->minors = 16; + gd->minors = SD_MINORS; + gd->ext_minors = SD_EXT_MINORS; gd->fops = &sd_fops; if (index < 26) { -- cgit v1.2.3 From 870d6656126add8e383645732b03df2b7ccd4f94 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:25 +0900 Subject: block: implement CONFIG_DEBUG_BLOCK_EXT_DEVT Extended devt introduces non-contiguos device numbers. This patch implements a debug option which forces most devt allocations to be from the extended area and spreads them out. This is enabled by default if DEBUG_KERNEL is set and achieves... 1. Detects code paths in kernel or userland which expect predetermined consecutive device numbers. 2. When something goes wrong, avoid corruption as adding to the minor of earlier partition won't lead to the wrong but valid device. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/ide/ide-disk.c | 6 ++++++ drivers/scsi/sd.c | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'drivers') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7a88de9ada2..a072df5053a 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -42,7 +42,13 @@ #include #define IDE_DISK_PARTS (1 << PARTN_BITS) + +#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define IDE_DISK_MINORS IDE_DISK_PARTS +#else +#define IDE_DISK_MINORS 1 +#endif + #define IDE_DISK_EXT_MINORS (IDE_DISK_PARTS - IDE_DISK_MINORS) struct ide_disk_obj { diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d1bb0e1d2d2..280d231a86e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -87,7 +87,13 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); #define SD_PARTS 64 + +#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define SD_MINORS 16 +#else +#define SD_MINORS 1 +#endif + #define SD_EXT_MINORS (SD_PARTS - SD_MINORS) static int sd_revalidate_disk(struct gendisk *); -- cgit v1.2.3 From ed9e1982347b36573cd622ee5f4e2a7ccd79b3fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:05 +0900 Subject: block: implement and use {disk|part}_to_dev() Implement {disk|part}_to_dev() and use them to access generic device instead of directly dereferencing {disk|part}->dev. To make sure no user is left behind, rename generic devices fields to __dev. This is in preparation of unifying partition 0 handling with other partitions. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 4 ++-- drivers/block/nbd.c | 4 ++-- drivers/ide/ide-probe.c | 2 +- drivers/md/dm.c | 4 ++-- drivers/md/md.c | 10 +++++----- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 0c39782b266..3edb6cb7d68 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -109,12 +109,12 @@ static const struct attribute_group attr_group = { static int aoedisk_add_sysfs(struct aoedev *d) { - return sysfs_create_group(&d->gd->dev.kobj, &attr_group); + return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group); } void aoedisk_rm_sysfs(struct aoedev *d) { - sysfs_remove_group(&d->gd->dev.kobj, &attr_group); + sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group); } static int diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1778e4a2c67..7b3351260d5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -403,7 +403,7 @@ static int nbd_do_it(struct nbd_device *lo) BUG_ON(lo->magic != LO_MAGIC); lo->pid = current->pid; - ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr); + ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); if (ret) { printk(KERN_ERR "nbd: sysfs_create_file failed!"); return ret; @@ -412,7 +412,7 @@ static int nbd_do_it(struct nbd_device *lo) while ((req = nbd_read_stat(lo)) != NULL) nbd_end_request(req); - sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr); + sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); return 0; } diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index a51a30e9eab..70aa86c8807 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1188,7 +1188,7 @@ static struct kobject *exact_match(dev_t dev, int *part, void *data) { struct gendisk *p = data; *part &= (1 << PARTN_BITS) - 1; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t dev, void *data) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 653624792ea..637806695bb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1186,7 +1186,7 @@ static void event_callback(void *context) list_splice_init(&md->uevent_list, &uevents); spin_unlock_irqrestore(&md->uevent_lock, flags); - dm_send_uevents(&uevents, &md->disk->dev.kobj); + dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); wake_up(&md->eventq); @@ -1643,7 +1643,7 @@ out: *---------------------------------------------------------------*/ void dm_kobject_uevent(struct mapped_device *md) { - kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); } uint32_t dm_next_uevent_seq(struct mapped_device *md) diff --git a/drivers/md/md.c b/drivers/md/md.c index deeac4b4417..96e9fccd2ea 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1465,9 +1465,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) goto fail; if (rdev->bdev->bd_part) - ko = &rdev->bdev->bd_part->dev.kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; else - ko = &rdev->bdev->bd_disk->dev.kobj; + ko = &disk_to_dev(rdev->bdev->bd_disk)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; @@ -3470,8 +3470,8 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, - "%s", "md"); + error = kobject_init_and_add(&mddev->kobj, &md_ktype, + &disk_to_dev(disk)->kobj, "%s", "md"); mutex_unlock(&disks_mutex); if (error) printk(KERN_WARNING "md: cannot register %s/md - name in use\n", @@ -3761,7 +3761,7 @@ static int do_md_run(mddev_t * mddev) sysfs_notify(&mddev->kobj, NULL, "array_state"); sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify(&mddev->kobj, NULL, "degraded"); - kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); return 0; } -- cgit v1.2.3 From 80795aefb76d10c5d698e60c7e7750b5330787da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:07 +0900 Subject: block: move capacity from disk to part0 Move disk->capacity to part0->nr_sects and convert all users who directly accessed the field to use {get|set}_capacity(). This is done early to allow the __dev field to be moved. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 2 +- drivers/block/aoe/aoecmd.c | 4 ++-- drivers/block/aoe/aoedev.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 3edb6cb7d68..aa69556c348 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -276,7 +276,7 @@ aoeblk_gdalloc(void *vp) gd->first_minor = d->sysminor * AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - gd->capacity = d->ssize; + set_capacity(gd, d->ssize); snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 17eed8c025d..934800f979c 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -645,7 +645,7 @@ aoecmd_sleepwork(struct work_struct *work) unsigned long flags; u64 ssize; - ssize = d->gd->capacity; + ssize = get_capacity(d->gd); bd = bdget_disk(d->gd, 0); if (bd) { @@ -707,7 +707,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) return; if (d->gd != NULL) { - d->gd->capacity = ssize; + set_capacity(d->gd, ssize); d->flags |= DEVFL_NEWSIZE; } else d->flags |= DEVFL_GDALLOC; diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index a1d813ab0d6..6a8038d115b 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -91,7 +91,7 @@ aoedev_downdev(struct aoedev *d) } if (d->gd) - d->gd->capacity = 0; + set_capacity(d->gd, 0); d->flags &= ~DEVFL_UP; } -- cgit v1.2.3 From b7db9956e57c8151b930d5e5fe5c766e6aad3ff7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:10 +0900 Subject: block: move policy from disk to part0 Move disk->policy to part0->policy. Implement and use get_disk_ro(). Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/ide/ide-cd.c | 2 +- drivers/md/dm-ioctl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index f16bb466723..03c2cb6a58b 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1113,7 +1113,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) if (write) { /* disk has become write protected */ - if (cd->disk->policy) { + if (get_disk_ro(cd->disk)) { cdrom_end_request(drive, 0); return ide_stopped; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c3de311117a..5b919159f08 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -548,7 +548,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) */ param->open_count = dm_open_count(md); - if (disk->policy) + if (get_disk_ro(disk)) param->flags |= DM_READONLY_FLAG; param->event_nr = dm_get_event_nr(md); -- cgit v1.2.3 From 0762b8bde9729f10f8e6249809660ff2ec3ad735 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:12 +0900 Subject: block: always set bdev->bd_part Till now, bdev->bd_part is set only if the bdev was for parts other than part0. This patch makes bdev->bd_part always set so that code paths don't have to differenciate common handling. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/md/md.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/md/md.c b/drivers/md/md.c index 96e9fccd2ea..2bd9cf41612 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1464,10 +1464,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; - if (rdev->bdev->bd_part) - ko = &part_to_dev(rdev->bdev->bd_part)->kobj; - else - ko = &disk_to_dev(rdev->bdev->bd_disk)->kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; -- cgit v1.2.3 From 074a7aca7afa6f230104e8e65eba3420263714a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:14 +0900 Subject: block: move stats from disk to part0 Move stats related fields - stamp, in_flight, dkstats - from disk to part0 and unify stat handling such that... * part_stat_*() now updates part0 together if the specified partition is not part0. ie. part_stat_*() are now essentially all_stat_*(). * {disk|all}_stat_*() are gone. * part_round_stats() is updated similary. It handles part0 stats automatically and disk_round_stats() is killed. * part_{inc|dec}_in_fligh() is implemented which automatically updates part0 stats for parts other than part0. * disk_map_sector_rcu() is updated to return part0 if no part matches. Combined with the above changes, this makes NULL special case handling in callers unnecessary. * Separate stats show code paths for disk are collapsed into part stats show code paths. * Rename disk_stat_lock/unlock() to part_stat_lock/unlock() While at it, reposition stat handling macros a bit and add missing parentheses around macro parameters. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 12 ++++++------ drivers/md/dm.c | 27 ++++++++++++++------------- drivers/md/linear.c | 9 +++++---- drivers/md/md.c | 4 ++-- drivers/md/multipath.c | 9 +++++---- drivers/md/raid0.c | 9 +++++---- drivers/md/raid1.c | 9 +++++---- drivers/md/raid10.c | 9 +++++---- drivers/md/raid5.c | 9 +++++---- 9 files changed, 52 insertions(+), 45 deletions(-) (limited to 'drivers') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 934800f979c..961d29a53ca 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -758,15 +758,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(cpu, disk, part, ios[rw], sector); - all_stat_add(cpu, disk, part, ticks[rw], duration, sector); - all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); - all_stat_add(cpu, disk, part, io_ticks, duration, sector); + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_stat_add(cpu, part, sectors[rw], n_sect); + part_stat_add(cpu, part, io_ticks, duration); - disk_stat_unlock(); + part_stat_unlock(); } void diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 637806695bb..327de03a5bd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -381,10 +381,10 @@ static void start_io_acct(struct dm_io *io) io->start_time = jiffies; - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_unlock(); - dm_disk(md)->in_flight = atomic_inc_return(&md->pending); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_unlock(); + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } static int end_io_acct(struct dm_io *io) @@ -395,12 +395,13 @@ static int end_io_acct(struct dm_io *io) int pending, cpu; int rw = bio_data_dir(bio); - cpu = disk_stat_lock(); - disk_round_stats(cpu, dm_disk(md)); - disk_stat_add(cpu, dm_disk(md), ticks[rw], duration); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); + part_stat_unlock(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + dm_disk(md)->part0.in_flight = pending = + atomic_dec_return(&md->pending); return !pending; } @@ -899,10 +900,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, dm_disk(md), ios[rw]); - disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); + part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); /* * If we're suspended we have to queue diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 00cbc8e4729..c80ea90593d 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -325,10 +325,11 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) return 0; } - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; diff --git a/drivers/md/md.c b/drivers/md/md.c index 2bd9cf41612..0a3a4bdcd4a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5546,8 +5546,8 @@ static int is_mddev_idle(mddev_t *mddev) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = disk_stat_read(disk, sectors[0]) + - disk_stat_read(disk, sectors[1]) - + curr_events = part_stat_read(&disk->part0, sectors[0]) + + part_stat_read(&disk->part0, sectors[1]) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 182f5a94cdc..8bb8794129b 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -159,10 +159,11 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e26030fa59a..f52f442a735 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -406,10 +406,11 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) return 0; } - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index babb13036f9..b9764429d85 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -804,10 +804,11 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); /* * make_request() can abort the operation when READA is being diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5ec80da0a9d..5f990133f5e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -844,10 +844,11 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5899f211515..ae16794bef2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3396,10 +3396,11 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - cpu = disk_stat_lock(); - disk_stat_inc(cpu, mddev->gendisk, ios[rw]); - disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi)); - disk_stat_unlock(); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bi)); + part_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && -- cgit v1.2.3 From 689d6fac40b41c7bf154f362deaf442548e4dc81 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:16 +0900 Subject: block: replace @ext_minors with GENHD_FL_EXT_DEVT With previous changes, it's meaningless to limit the number of partitions. Replace @ext_minors with GENHD_FL_EXT_DEVT such that setting the flag allows the disk to have maximum number of allowed partitions (only limited by the number of entries in parsed_partitions as determined by MAX_PART constant). This kills not-too-pretty alloc_disk_ext[_node]() functions and makes @minors parameter to alloc_disk[_node]() unnecessary. The parameter is left alone to avoid disturbing the users. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/ide/ide-disk.c | 14 +++++--------- drivers/scsi/sd.c | 9 ++------- 2 files changed, 7 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index a072df5053a..29c8ae75268 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -41,16 +41,12 @@ #include #include -#define IDE_DISK_PARTS (1 << PARTN_BITS) - #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) -#define IDE_DISK_MINORS IDE_DISK_PARTS +#define IDE_DISK_MINORS (1 << PARTN_BITS) #else #define IDE_DISK_MINORS 1 #endif -#define IDE_DISK_EXT_MINORS (IDE_DISK_PARTS - IDE_DISK_MINORS) - struct ide_disk_obj { ide_drive_t *drive; ide_driver_t *driver; @@ -1161,8 +1157,7 @@ static int ide_disk_probe(ide_drive_t *drive) if (!idkp) goto failed; - g = alloc_disk_ext_node(IDE_DISK_MINORS, IDE_DISK_EXT_MINORS, - hwif_to_node(drive->hwif)); + g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif)); if (!g) goto out_free_idkp; @@ -1189,9 +1184,10 @@ static int ide_disk_probe(ide_drive_t *drive) drive->attach = 1; g->minors = IDE_DISK_MINORS; - g->ext_minors = IDE_DISK_EXT_MINORS; g->driverfs_dev = &drive->gendev; - g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; + g->flags |= GENHD_FL_EXT_DEVT; + if (drive->removable) + g->flags |= GENHD_FL_REMOVABLE; set_capacity(g, idedisk_capacity(drive)); g->fops = &idedisk_ops; add_disk(g); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 280d231a86e..6598024531d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -86,16 +86,12 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); -#define SD_PARTS 64 - #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define SD_MINORS 16 #else #define SD_MINORS 1 #endif -#define SD_EXT_MINORS (SD_PARTS - SD_MINORS) - static int sd_revalidate_disk(struct gendisk *); static int sd_probe(struct device *); static int sd_remove(struct device *); @@ -1811,7 +1807,7 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; - gd = alloc_disk_ext(SD_MINORS, SD_EXT_MINORS); + gd = alloc_disk(SD_MINORS); if (!gd) goto out_free; @@ -1856,7 +1852,6 @@ static int sd_probe(struct device *dev) gd->major = sd_major((index & 0xf0) >> 4); gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); gd->minors = SD_MINORS; - gd->ext_minors = SD_EXT_MINORS; gd->fops = &sd_fops; if (index < 26) { @@ -1880,7 +1875,7 @@ static int sd_probe(struct device *dev) blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); gd->driverfs_dev = &sdp->sdev_gendev; - gd->flags = GENHD_FL_DRIVERFS; + gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; if (sdp->removable) gd->flags |= GENHD_FL_REMOVABLE; -- cgit v1.2.3 From 3e1a7ff8a0a7b948f2684930166954f9e8e776fe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:17 +0900 Subject: block: allow disk to have extended device number Now that disk and partition handlings are mostly unified, it's easy to allow disk to have extended device number. This patch makes add_disk() use extended device number if disk->minors is zero. Both sd and ide-disk are updated to use this. * sd_format_disk_name() is implemented which can generically determine the drive name. This removes disk number restriction stemming from limited device names. * If sd index goes over SD_MAX_DISKS (which can be increased now BTW), sd simply doesn't initialize minors letting block layer choose extended device number. * If CONFIG_DEBUG_EXT_DEVT is set, both sd and ide-disk always set minors to 0 and use extended device numbers. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/ide/ide-disk.c | 2 +- drivers/scsi/sd.c | 74 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 55 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 29c8ae75268..33ea8c04871 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -44,7 +44,7 @@ #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define IDE_DISK_MINORS (1 << PARTN_BITS) #else -#define IDE_DISK_MINORS 1 +#define IDE_DISK_MINORS 0 #endif struct ide_disk_obj { diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6598024531d..bcb04b2a767 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -89,7 +89,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); #if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) #define SD_MINORS 16 #else -#define SD_MINORS 1 +#define SD_MINORS 0 #endif static int sd_revalidate_disk(struct gendisk *); @@ -1769,6 +1769,52 @@ static int sd_revalidate_disk(struct gendisk *disk) return 0; } +/** + * sd_format_disk_name - format disk name + * @prefix: name prefix - ie. "sd" for SCSI disks + * @index: index of the disk to format name for + * @buf: output buffer + * @buflen: length of the output buffer + * + * SCSI disk names starts at sda. The 26th device is sdz and the + * 27th is sdaa. The last one for two lettered suffix is sdzz + * which is followed by sdaaa. + * + * This is basically 26 base counting with one extra 'nil' entry + * at the beggining from the second digit on and can be + * determined using similar method as 26 base conversion with the + * index shifted -1 after each digit is computed. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) +{ + const int base = 'z' - 'a' + 1; + char *begin = buf + strlen(prefix); + char *end = buf + buflen; + char *p; + int unit; + + p = end - 1; + *p = '\0'; + unit = base; + do { + if (p == begin) + return -EINVAL; + *--p = 'a' + (index % unit); + index = (index / unit) - 1; + } while (index >= 0); + + memmove(begin, p, end - p); + memcpy(buf, prefix, strlen(prefix)); + + return 0; +} + /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once @@ -1821,8 +1867,8 @@ static int sd_probe(struct device *dev) if (error) goto out_put; - error = -EBUSY; - if (index >= SD_MAX_DISKS) + error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); + if (error) goto out_free_index; sdkp->device = sdp; @@ -1849,24 +1895,12 @@ static int sd_probe(struct device *dev) get_device(&sdp->sdev_gendev); - gd->major = sd_major((index & 0xf0) >> 4); - gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); - gd->minors = SD_MINORS; - gd->fops = &sd_fops; - - if (index < 26) { - sprintf(gd->disk_name, "sd%c", 'a' + index % 26); - } else if (index < (26 + 1) * 26) { - sprintf(gd->disk_name, "sd%c%c", - 'a' + index / 26 - 1,'a' + index % 26); - } else { - const unsigned int m1 = (index / 26 - 1) / 26 - 1; - const unsigned int m2 = (index / 26 - 1) % 26; - const unsigned int m3 = index % 26; - sprintf(gd->disk_name, "sd%c%c%c", - 'a' + m1, 'a' + m2, 'a' + m3); + if (index < SD_MAX_DISKS) { + gd->major = sd_major((index & 0xf0) >> 4); + gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); + gd->minors = SD_MINORS; } - + gd->fops = &sd_fops; gd->private_data = &sdkp->driver; gd->queue = sdkp->device->request_queue; -- cgit v1.2.3 From a3bce90edd8f6cafe3f63b1a943800792e830178 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:05 +0900 Subject: block: add gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov Currently, blk_rq_map_user and blk_rq_map_user_iov always do GFP_KERNEL allocation. This adds gfp_mask argument to blk_rq_map_user and blk_rq_map_user_iov so sg can use it (sg always does GFP_ATOMIC allocation). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 2 +- drivers/scsi/scsi_tgt_lib.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 74031de517e..e861d24a6d3 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,7 +2097,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - ret = blk_rq_map_user(q, rq, ubuf, len); + ret = blk_rq_map_user(q, rq, ubuf, len, GFP_KERNEL); if (ret) break; diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 257e097c39a..2a4fd820d61 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -362,7 +362,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd, int err; dprintk("%lx %u\n", uaddr, len); - err = blk_rq_map_user(q, rq, (void *)uaddr, len); + err = blk_rq_map_user(q, rq, (void *)uaddr, len, GFP_KERNEL); if (err) { /* * TODO: need to fixup sg_tablesize, max_segment_size, -- cgit v1.2.3 From 152e283fdfea0cd11e297d982378b55937842dde Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:06 +0900 Subject: block: introduce struct rq_map_data to use reserved pages This patch introduces struct rq_map_data to enable bio_copy_use_iov() use reserved pages. Currently, bio_copy_user_iov allocates bounce pages but drivers/scsi/sg.c wants to allocate pages by itself and use them. struct rq_map_data can be used to pass allocated pages to bio_copy_user_iov. The current users of bio_copy_user_iov simply passes NULL (they don't want to use pre-allocated pages). Signed-off-by: FUJITA Tomonori Cc: Jens Axboe Cc: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 2 +- drivers/scsi/scsi_tgt_lib.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index e861d24a6d3..d47f2f80acc 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,7 +2097,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - ret = blk_rq_map_user(q, rq, ubuf, len, GFP_KERNEL); + ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); if (ret) break; diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 2a4fd820d61..3117bb106b5 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -362,7 +362,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd, int err; dprintk("%lx %u\n", uaddr, len); - err = blk_rq_map_user(q, rq, (void *)uaddr, len, GFP_KERNEL); + err = blk_rq_map_user(q, rq, NULL, (void *)uaddr, len, GFP_KERNEL); if (err) { /* * TODO: need to fixup sg_tablesize, max_segment_size, -- cgit v1.2.3 From 10865dfa34e7552c4c64606edcdf1e21a110c985 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:07 +0900 Subject: sg: convert the non-data path to use the block layer This patch converts the non data path to use the block layer functions (blk_get_request, blk_execute_rq_nowait, etc) instead of uses scsi_execute_async(). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 661f9f21650..487c7776cc4 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -137,6 +137,7 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ char orphan; /* 1 -> drop on sight, 0 -> normal */ char sg_io_owned; /* 1 -> packet belongs to SG_IO */ volatile char done; /* 0->before bh, 1->before read, 2->read */ + struct request *rq; } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ @@ -176,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ static int sg_fasync(int fd, struct file *filp, int mode); /* tasklet or soft irq callback */ static void sg_cmd_done(void *data, char *sense, int result, int resid); -static int sg_start_req(Sg_request * srp); +static int sg_start_req(Sg_request *srp, unsigned char *cmd); static void sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, @@ -229,6 +230,11 @@ static int sg_allow_access(struct file *filp, unsigned char *cmd) cmd, filp->f_mode & FMODE_WRITE); } +static void sg_rq_end_io(struct request *rq, int uptodate) +{ + sg_cmd_done(rq->end_io_data, rq->sense, rq->errors, rq->data_len); +} + static int sg_open(struct inode *inode, struct file *filp) { @@ -732,7 +738,8 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, SCSI_LOG_TIMEOUT(4, printk("sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); - if ((k = sg_start_req(srp))) { + k = sg_start_req(srp, cmnd); + if (k) { SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k)); sg_finish_rem_req(srp); return k; /* probably out of space --> ENOMEM */ @@ -765,6 +772,12 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, hp->duration = jiffies_to_msecs(jiffies); /* Now send everything of to mid-level. The next time we hear about this packet is when sg_cmd_done() is called (i.e. a callback). */ + if (srp->rq) { + srp->rq->timeout = timeout; + blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, + srp->rq, 1, sg_rq_end_io); + return 0; + } if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer, hp->dxfer_len, srp->data.k_use_sg, timeout, SG_DEFAULT_RETRIES, srp, sg_cmd_done, @@ -1634,8 +1647,32 @@ exit_sg(void) idr_destroy(&sg_index_idr); } -static int -sg_start_req(Sg_request * srp) +static int __sg_start_req(struct sg_request *srp, struct sg_io_hdr *hp, + unsigned char *cmd) +{ + struct sg_fd *sfp = srp->parentfp; + struct request_queue *q = sfp->parentdp->device->request_queue; + struct request *rq; + int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; + + rq = blk_get_request(q, rw, GFP_ATOMIC); + if (!rq) + return -ENOMEM; + + memcpy(rq->cmd, cmd, hp->cmd_len); + + rq->cmd_len = hp->cmd_len; + rq->cmd_type = REQ_TYPE_BLOCK_PC; + + srp->rq = rq; + rq->end_io_data = srp; + rq->sense = srp->sense_b; + rq->retries = SG_DEFAULT_RETRIES; + + return 0; +} + +static int sg_start_req(Sg_request *srp, unsigned char *cmd) { int res; Sg_fd *sfp = srp->parentfp; @@ -1646,8 +1683,10 @@ sg_start_req(Sg_request * srp) Sg_scatter_hold *rsv_schp = &sfp->reserve; SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); + if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) - return 0; + return __sg_start_req(srp, hp, cmd); + if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && (!sfp->parentdp->device->host->unchecked_isa_dma)) { @@ -1678,6 +1717,10 @@ sg_finish_rem_req(Sg_request * srp) sg_unlink_reserve(sfp, srp); else sg_remove_scat(req_schp); + + if (srp->rq) + blk_put_request(srp->rq); + sg_remove_request(sfp, srp); } -- cgit v1.2.3 From 6e5a30cba5e7c03b2cd564e968f1dd667a0f7c42 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 16:17:08 +0900 Subject: sg: convert the direct IO path to use the block layer This patch converts the direct IO path (SG_FLAG_DIRECT_IO) to use the block layer functions (blk_get_request, blk_execute_rq_nowait, blk_rq_map_user, etc) instead of scsi_execute_async(). Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 173 +++++++++--------------------------------------------- 1 file changed, 27 insertions(+), 146 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 487c7776cc4..cb6de0752ee 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -138,6 +138,7 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ char sg_io_owned; /* 1 -> packet belongs to SG_IO */ volatile char done; /* 0->before bh, 1->before read, 2->read */ struct request *rq; + struct bio *bio; } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ @@ -1679,21 +1680,29 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; + unsigned long uaddr = (unsigned long)hp->dxferp; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; + struct request_queue *q = sfp->parentdp->device->request_queue; + unsigned long alignment = queue_dma_alignment(q) | q->dma_pad_mask; SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return __sg_start_req(srp, hp, cmd); +#ifdef SG_ALLOW_DIO_CODE if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && - (!sfp->parentdp->device->host->unchecked_isa_dma)) { - res = sg_build_direct(srp, sfp, dxfer_len); - if (res <= 0) /* -ve -> error, 0 -> done, 1 -> try indirect */ - return res; + (!sfp->parentdp->device->host->unchecked_isa_dma) && + !(uaddr & alignment) && !(dxfer_len & alignment)) { + res = __sg_start_req(srp, hp, cmd); + if (!res) + res = sg_build_direct(srp, sfp, dxfer_len); + + return res; } +#endif if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) sg_link_reserve(sfp, srp, dxfer_len); else { @@ -1718,8 +1727,11 @@ sg_finish_rem_req(Sg_request * srp) else sg_remove_scat(req_schp); - if (srp->rq) + if (srp->rq) { + if (srp->bio) + blk_rq_unmap_user(srp->bio); blk_put_request(srp->rq); + } sg_remove_request(sfp, srp); } @@ -1746,151 +1758,23 @@ sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) return tablesize; /* number of scat_gath elements allocated */ } -#ifdef SG_ALLOW_DIO_CODE -/* vvvvvvvv following code borrowed from st driver's direct IO vvvvvvvvv */ - /* TODO: hopefully we can use the generic block layer code */ - -/* Pin down user pages and put them into a scatter gather list. Returns <= 0 if - - mapping of all pages not successful - (i.e., either completely successful or fails) -*/ -static int -st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages, - unsigned long uaddr, size_t count, int rw) -{ - unsigned long end = (uaddr + count + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int res, i, j; - struct page **pages; - - /* User attempted Overflow! */ - if ((uaddr + count) < uaddr) - return -EINVAL; - - /* Too big */ - if (nr_pages > max_pages) - return -ENOMEM; - - /* Hmm? */ - if (count == 0) - return 0; - - if ((pages = kmalloc(max_pages * sizeof(*pages), GFP_ATOMIC)) == NULL) - return -ENOMEM; - - /* Try to fault in all of the necessary pages */ - down_read(¤t->mm->mmap_sem); - /* rw==READ means read from drive, write into memory area */ - res = get_user_pages( - current, - current->mm, - uaddr, - nr_pages, - rw == READ, - 0, /* don't force */ - pages, - NULL); - up_read(¤t->mm->mmap_sem); - - /* Errors and no page mapped should return here */ - if (res < nr_pages) - goto out_unmap; - - for (i=0; i < nr_pages; i++) { - /* FIXME: flush superflous for rw==READ, - * probably wrong function for rw==WRITE - */ - flush_dcache_page(pages[i]); - /* ?? Is locking needed? I don't think so */ - /* if (!trylock_page(pages[i])) - goto out_unlock; */ - } - - sg_set_page(sgl, pages[0], 0, uaddr & ~PAGE_MASK); - if (nr_pages > 1) { - sgl[0].length = PAGE_SIZE - sgl[0].offset; - count -= sgl[0].length; - for (i=1; i < nr_pages ; i++) - sg_set_page(&sgl[i], pages[i], count < PAGE_SIZE ? count : PAGE_SIZE, 0); - } - else { - sgl[0].length = count; - } - - kfree(pages); - return nr_pages; - - out_unmap: - if (res > 0) { - for (j=0; j < res; j++) - page_cache_release(pages[j]); - res = 0; - } - kfree(pages); - return res; -} - - -/* And unmap them... */ -static int -st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages, - int dirtied) -{ - int i; - - for (i=0; i < nr_pages; i++) { - struct page *page = sg_page(&sgl[i]); - - if (dirtied) - SetPageDirty(page); - /* unlock_page(page); */ - /* FIXME: cache flush missing for rw==READ - * FIXME: call the correct reference counting function - */ - page_cache_release(page); - } - - return 0; -} - -/* ^^^^^^^^ above code borrowed from st driver's direct IO ^^^^^^^^^ */ -#endif - - /* Returns: -ve -> error, 0 -> done, 1 -> try indirect */ static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len) { -#ifdef SG_ALLOW_DIO_CODE sg_io_hdr_t *hp = &srp->header; Sg_scatter_hold *schp = &srp->data; - int sg_tablesize = sfp->parentdp->sg_tablesize; - int mx_sc_elems, res; - struct scsi_device *sdev = sfp->parentdp->device; - - if (((unsigned long)hp->dxferp & - queue_dma_alignment(sdev->request_queue)) != 0) - return 1; + int res; + struct request *rq = srp->rq; + struct request_queue *q = sfp->parentdp->device->request_queue; - mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize); - if (mx_sc_elems <= 0) { - return 1; - } - res = st_map_user_pages(schp->buffer, mx_sc_elems, - (unsigned long)hp->dxferp, dxfer_len, - (SG_DXFER_TO_DEV == hp->dxfer_direction) ? 1 : 0); - if (res <= 0) { - sg_remove_scat(schp); - return 1; - } - schp->k_use_sg = res; + res = blk_rq_map_user(q, rq, NULL, hp->dxferp, dxfer_len, GFP_ATOMIC); + if (res) + return res; + srp->bio = rq->bio; schp->dio_in_use = 1; hp->info |= SG_INFO_DIRECT_IO; return 0; -#else - return 1; -#endif } static int @@ -2069,11 +1953,7 @@ sg_remove_scat(Sg_scatter_hold * schp) if (schp->buffer && (schp->sglist_len > 0)) { struct scatterlist *sg = schp->buffer; - if (schp->dio_in_use) { -#ifdef SG_ALLOW_DIO_CODE - st_unmap_user_pages(sg, schp->k_use_sg, TRUE); -#endif - } else { + if (!schp->dio_in_use) { int k; for (k = 0; (k < schp->k_use_sg) && sg_page(sg); @@ -2083,8 +1963,9 @@ sg_remove_scat(Sg_scatter_hold * schp) k, sg_page(sg), sg->length)); sg_page_free(sg_page(sg), sg->length); } + + kfree(schp->buffer); } - kfree(schp->buffer); } memset(schp, 0, sizeof (*schp)); } -- cgit v1.2.3 From 10db10d144c0248f285242f79daf6b9de6b00a62 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 29 Aug 2008 12:32:18 +0200 Subject: sg: convert the indirect IO path to use the block layer This patch converts the indirect IO path (including mmap IO and old struct sg_header) to use the block layer functions (blk_get_request, blk_execute_rq_nowait, blk_rq_map_user, etc) instead of scsi_execute_async(). [Jens: fixed compile error with SCSI logging enabled] Signed-off-by: FUJITA Tomonori Signed-off-by: Douglas Gilbert Cc: Mike Christie Cc: James Bottomley Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 397 +++++++++++++++--------------------------------------- 1 file changed, 105 insertions(+), 292 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index cb6de0752ee..d6391666502 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -47,7 +47,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #include #include #include -#include #include #include @@ -119,7 +118,8 @@ typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */ unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ unsigned bufflen; /* Size of (aggregate) data buffer */ unsigned b_malloc_len; /* actual len malloc'ed in buffer */ - struct scatterlist *buffer;/* scatter list */ + struct page **pages; + int page_order; char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ unsigned char cmd_opcode; /* first byte of command */ } Sg_scatter_hold; @@ -190,8 +190,6 @@ static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, int read_only, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); -static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, - int wr_xf, int *countp, unsigned char __user **up); static int sg_write_xfer(Sg_request * srp); static int sg_read_xfer(Sg_request * srp); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); @@ -199,8 +197,6 @@ static void sg_remove_scat(Sg_scatter_hold * schp); static void sg_build_reserve(Sg_fd * sfp, int req_size); static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); -static struct page *sg_page_malloc(int rqSz, int lowDma, int *retSzp); -static void sg_page_free(struct page *page, int size); static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev); static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); @@ -771,26 +767,11 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, break; } hp->duration = jiffies_to_msecs(jiffies); -/* Now send everything of to mid-level. The next time we hear about this - packet is when sg_cmd_done() is called (i.e. a callback). */ - if (srp->rq) { - srp->rq->timeout = timeout; - blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, - srp->rq, 1, sg_rq_end_io); - return 0; - } - if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer, - hp->dxfer_len, srp->data.k_use_sg, timeout, - SG_DEFAULT_RETRIES, srp, sg_cmd_done, - GFP_ATOMIC)) { - SCSI_LOG_TIMEOUT(1, printk("sg_common_write: scsi_execute_async failed\n")); - /* - * most likely out of mem, but could also be a bad map - */ - sg_finish_rem_req(srp); - return -ENOMEM; - } else - return 0; + + srp->rq->timeout = timeout; + blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, + srp->rq, 1, sg_rq_end_io); + return 0; } static int @@ -1206,8 +1187,7 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) Sg_fd *sfp; unsigned long offset, len, sa; Sg_scatter_hold *rsv_schp; - struct scatterlist *sg; - int k; + int k, length; if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data))) return VM_FAULT_SIGBUS; @@ -1217,15 +1197,14 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n", offset, rsv_schp->k_use_sg)); - sg = rsv_schp->buffer; sa = vma->vm_start; - for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); - ++k, sg = sg_next(sg)) { + length = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; - len = (len < sg->length) ? len : sg->length; + len = (len < length) ? len : length; if (offset < len) { - struct page *page; - page = virt_to_page(page_address(sg_page(sg)) + offset); + struct page *page = nth_page(rsv_schp->pages[k], + offset >> PAGE_SHIFT); get_page(page); /* increment page count */ vmf->page = page; return 0; /* success */ @@ -1247,8 +1226,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) Sg_fd *sfp; unsigned long req_sz, len, sa; Sg_scatter_hold *rsv_schp; - int k; - struct scatterlist *sg; + int k, length; if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data))) return -ENXIO; @@ -1262,11 +1240,10 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) return -ENOMEM; /* cannot map more than reserved buffer */ sa = vma->vm_start; - sg = rsv_schp->buffer; - for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); - ++k, sg = sg_next(sg)) { + length = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; - len = (len < sg->length) ? len : sg->length; + len = (len < length) ? len : length; sa += len; } @@ -1310,7 +1287,6 @@ sg_cmd_done(void *data, char *sense, int result, int resid) if (0 != result) { struct scsi_sense_hdr sshdr; - memcpy(srp->sense_b, sense, sizeof (srp->sense_b)); srp->header.status = 0xff & result; srp->header.masked_status = status_byte(result); srp->header.msg_status = msg_byte(result); @@ -1685,34 +1661,51 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) Sg_scatter_hold *rsv_schp = &sfp->reserve; struct request_queue *q = sfp->parentdp->device->request_queue; unsigned long alignment = queue_dma_alignment(q) | q->dma_pad_mask; + struct rq_map_data map_data; SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); + res = __sg_start_req(srp, hp, cmd); + if (res) + return res; + if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) - return __sg_start_req(srp, hp, cmd); + return 0; #ifdef SG_ALLOW_DIO_CODE if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && (!sfp->parentdp->device->host->unchecked_isa_dma) && - !(uaddr & alignment) && !(dxfer_len & alignment)) { - res = __sg_start_req(srp, hp, cmd); - if (!res) - res = sg_build_direct(srp, sfp, dxfer_len); - - return res; - } + !(uaddr & alignment) && !(dxfer_len & alignment)) + return sg_build_direct(srp, sfp, dxfer_len); #endif if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) sg_link_reserve(sfp, srp, dxfer_len); - else { + else res = sg_build_indirect(req_schp, sfp, dxfer_len); - if (res) { - sg_remove_scat(req_schp); - return res; - } + + if (!res) { + struct request *rq = srp->rq; + Sg_scatter_hold *schp = &srp->data; + int iovec_count = (int) hp->iovec_count; + + map_data.pages = schp->pages; + map_data.page_order = schp->page_order; + map_data.nr_entries = schp->k_use_sg; + + if (iovec_count) + res = blk_rq_map_user_iov(q, rq, &map_data, hp->dxferp, + iovec_count, + hp->dxfer_len, GFP_ATOMIC); + else + res = blk_rq_map_user(q, rq, &map_data, hp->dxferp, + hp->dxfer_len, GFP_ATOMIC); + + if (!res) + srp->bio = rq->bio; } - return 0; + + return res; } static void @@ -1730,6 +1723,7 @@ sg_finish_rem_req(Sg_request * srp) if (srp->rq) { if (srp->bio) blk_rq_unmap_user(srp->bio); + blk_put_request(srp->rq); } @@ -1739,21 +1733,12 @@ sg_finish_rem_req(Sg_request * srp) static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) { - int sg_bufflen = tablesize * sizeof(struct scatterlist); + int sg_bufflen = tablesize * sizeof(struct page *); gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN; - /* - * TODO: test without low_dma, we should not need it since - * the block layer will bounce the buffer for us - * - * XXX(hch): we shouldn't need GFP_DMA for the actual S/G list. - */ - if (sfp->low_dma) - gfp_flags |= GFP_DMA; - schp->buffer = kzalloc(sg_bufflen, gfp_flags); - if (!schp->buffer) + schp->pages = kzalloc(sg_bufflen, gfp_flags); + if (!schp->pages) return -ENOMEM; - sg_init_table(schp->buffer, tablesize); schp->sglist_len = sg_bufflen; return tablesize; /* number of scat_gath elements allocated */ } @@ -1780,11 +1765,10 @@ sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len) static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) { - struct scatterlist *sg; - int ret_sz = 0, k, rem_sz, num, mx_sc_elems; + int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems; int sg_tablesize = sfp->parentdp->sg_tablesize; - int blk_size = buff_size; - struct page *p = NULL; + int blk_size = buff_size, order; + gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; if (blk_size < 0) return -EFAULT; @@ -1808,15 +1792,26 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) } else scatter_elem_sz_prev = num; } - for (k = 0, sg = schp->buffer, rem_sz = blk_size; - (rem_sz > 0) && (k < mx_sc_elems); - ++k, rem_sz -= ret_sz, sg = sg_next(sg)) { - + + if (sfp->low_dma) + gfp_mask |= GFP_DMA; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + gfp_mask |= __GFP_ZERO; + + order = get_order(num); +retry: + ret_sz = 1 << (PAGE_SHIFT + order); + + for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems; + k++, rem_sz -= ret_sz) { + num = (rem_sz > scatter_elem_sz_prev) ? - scatter_elem_sz_prev : rem_sz; - p = sg_page_malloc(num, sfp->low_dma, &ret_sz); - if (!p) - return -ENOMEM; + scatter_elem_sz_prev : rem_sz; + + schp->pages[k] = alloc_pages(gfp_mask, order); + if (!schp->pages[k]) + goto out; if (num == scatter_elem_sz_prev) { if (unlikely(ret_sz > scatter_elem_sz_prev)) { @@ -1824,12 +1819,12 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) scatter_elem_sz_prev = ret_sz; } } - sg_set_page(sg, p, (ret_sz > num) ? num : ret_sz, 0); SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, " "ret_sz=%d\n", k, num, ret_sz)); } /* end of for loop */ + schp->page_order = order; schp->k_use_sg = k; SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, " "rem_sz=%d\n", k, rem_sz)); @@ -1837,8 +1832,15 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) schp->bufflen = blk_size; if (rem_sz > 0) /* must have failed */ return -ENOMEM; - return 0; +out: + for (i = 0; i < k; i++) + __free_pages(schp->pages[k], order); + + if (--order >= 0) + goto retry; + + return -ENOMEM; } static int @@ -1846,13 +1848,8 @@ sg_write_xfer(Sg_request * srp) { sg_io_hdr_t *hp = &srp->header; Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; int num_xfer = 0; - int j, k, onum, usglen, ksglen, res; - int iovec_count = (int) hp->iovec_count; int dxfer_dir = hp->dxfer_direction; - unsigned char *p; - unsigned char __user *up; int new_interface = ('\0' == hp->interface_id) ? 0 : 1; if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) || @@ -1866,83 +1863,9 @@ sg_write_xfer(Sg_request * srp) && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) return 0; - SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", - num_xfer, iovec_count, schp->k_use_sg)); - if (iovec_count) { - onum = iovec_count; - if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum)) - return -EFAULT; - } else - onum = 1; - - ksglen = sg->length; - p = page_address(sg_page(sg)); - for (j = 0, k = 0; j < onum; ++j) { - res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up); - if (res) - return res; - - for (; p; sg = sg_next(sg), ksglen = sg->length, - p = page_address(sg_page(sg))) { - if (usglen <= 0) - break; - if (ksglen > usglen) { - if (usglen >= num_xfer) { - if (__copy_from_user(p, up, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_from_user(p, up, usglen)) - return -EFAULT; - p += usglen; - ksglen -= usglen; - break; - } else { - if (ksglen >= num_xfer) { - if (__copy_from_user(p, up, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_from_user(p, up, ksglen)) - return -EFAULT; - up += ksglen; - usglen -= ksglen; - } - ++k; - if (k >= schp->k_use_sg) - return 0; - } - } - - return 0; -} + SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, k_use_sg=%d\n", + num_xfer, schp->k_use_sg)); -static int -sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, - int wr_xf, int *countp, unsigned char __user **up) -{ - int num_xfer = (int) hp->dxfer_len; - unsigned char __user *p = hp->dxferp; - int count; - - if (0 == sg_num) { - if (wr_xf && ('\0' == hp->interface_id)) - count = (int) hp->flags; /* holds "old" input_size */ - else - count = num_xfer; - } else { - sg_iovec_t iovec; - if (__copy_from_user(&iovec, p + ind*SZ_SG_IOVEC, SZ_SG_IOVEC)) - return -EFAULT; - p = iovec.iov_base; - count = (int) iovec.iov_len; - } - if (!access_ok(wr_xf ? VERIFY_READ : VERIFY_WRITE, p, count)) - return -EFAULT; - if (up) - *up = p; - if (countp) - *countp = count; return 0; } @@ -1950,21 +1873,18 @@ static void sg_remove_scat(Sg_scatter_hold * schp) { SCSI_LOG_TIMEOUT(4, printk("sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg)); - if (schp->buffer && (schp->sglist_len > 0)) { - struct scatterlist *sg = schp->buffer; - + if (schp->pages && schp->sglist_len > 0) { if (!schp->dio_in_use) { int k; - for (k = 0; (k < schp->k_use_sg) && sg_page(sg); - ++k, sg = sg_next(sg)) { + for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { SCSI_LOG_TIMEOUT(5, printk( - "sg_remove_scat: k=%d, pg=0x%p, len=%d\n", - k, sg_page(sg), sg->length)); - sg_page_free(sg_page(sg), sg->length); + "sg_remove_scat: k=%d, pg=0x%p\n", + k, schp->pages[k])); + __free_pages(schp->pages[k], schp->page_order); } - kfree(schp->buffer); + kfree(schp->pages); } } memset(schp, 0, sizeof (*schp)); @@ -1975,13 +1895,8 @@ sg_read_xfer(Sg_request * srp) { sg_io_hdr_t *hp = &srp->header; Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; int num_xfer = 0; - int j, k, onum, usglen, ksglen, res; - int iovec_count = (int) hp->iovec_count; int dxfer_dir = hp->dxfer_direction; - unsigned char *p; - unsigned char __user *up; int new_interface = ('\0' == hp->interface_id) ? 0 : 1; if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir) @@ -1996,53 +1911,7 @@ sg_read_xfer(Sg_request * srp) return 0; SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", - num_xfer, iovec_count, schp->k_use_sg)); - if (iovec_count) { - onum = iovec_count; - if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum)) - return -EFAULT; - } else - onum = 1; - - p = page_address(sg_page(sg)); - ksglen = sg->length; - for (j = 0, k = 0; j < onum; ++j) { - res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up); - if (res) - return res; - - for (; p; sg = sg_next(sg), ksglen = sg->length, - p = page_address(sg_page(sg))) { - if (usglen <= 0) - break; - if (ksglen > usglen) { - if (usglen >= num_xfer) { - if (__copy_to_user(up, p, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_to_user(up, p, usglen)) - return -EFAULT; - p += usglen; - ksglen -= usglen; - break; - } else { - if (ksglen >= num_xfer) { - if (__copy_to_user(up, p, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_to_user(up, p, ksglen)) - return -EFAULT; - up += ksglen; - usglen -= ksglen; - } - ++k; - if (k >= schp->k_use_sg) - return 0; - } - } - + num_xfer, (int)hp->iovec_count, schp->k_use_sg)); return 0; } @@ -2050,7 +1919,6 @@ static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) { Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; int k, num; SCSI_LOG_TIMEOUT(4, printk("sg_read_oxfer: num_read_xfer=%d\n", @@ -2058,15 +1926,18 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) if ((!outp) || (num_read_xfer <= 0)) return 0; - for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) { - num = sg->length; + blk_rq_unmap_user(srp->bio); + srp->bio = NULL; + + num = 1 << (PAGE_SHIFT + schp->page_order); + for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { if (num > num_read_xfer) { - if (__copy_to_user(outp, page_address(sg_page(sg)), + if (__copy_to_user(outp, page_address(schp->pages[k]), num_read_xfer)) return -EFAULT; break; } else { - if (__copy_to_user(outp, page_address(sg_page(sg)), + if (__copy_to_user(outp, page_address(schp->pages[k]), num)) return -EFAULT; num_read_xfer -= num; @@ -2101,24 +1972,22 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size) { Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; - struct scatterlist *sg = rsv_schp->buffer; int k, num, rem; srp->res_used = 1; SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size)); rem = size; - for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) { - num = sg->length; + num = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg; k++) { if (rem <= num) { - sfp->save_scat_len = num; - sg->length = rem; req_schp->k_use_sg = k + 1; req_schp->sglist_len = rsv_schp->sglist_len; - req_schp->buffer = rsv_schp->buffer; + req_schp->pages = rsv_schp->pages; req_schp->bufflen = size; req_schp->b_malloc_len = rsv_schp->b_malloc_len; + req_schp->page_order = rsv_schp->page_order; break; } else rem -= num; @@ -2132,22 +2001,13 @@ static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) { Sg_scatter_hold *req_schp = &srp->data; - Sg_scatter_hold *rsv_schp = &sfp->reserve; SCSI_LOG_TIMEOUT(4, printk("sg_unlink_reserve: req->k_use_sg=%d\n", (int) req_schp->k_use_sg)); - if ((rsv_schp->k_use_sg > 0) && (req_schp->k_use_sg > 0)) { - struct scatterlist *sg = rsv_schp->buffer; - - if (sfp->save_scat_len > 0) - (sg + (req_schp->k_use_sg - 1))->length = - (unsigned) sfp->save_scat_len; - else - SCSI_LOG_TIMEOUT(1, printk ("sg_unlink_reserve: BAD save_scat_len\n")); - } req_schp->k_use_sg = 0; req_schp->bufflen = 0; - req_schp->buffer = NULL; + req_schp->pages = NULL; + req_schp->page_order = 0; req_schp->sglist_len = 0; sfp->save_scat_len = 0; srp->res_used = 0; @@ -2405,53 +2265,6 @@ sg_res_in_use(Sg_fd * sfp) return srp ? 1 : 0; } -/* The size fetched (value output via retSzp) set when non-NULL return */ -static struct page * -sg_page_malloc(int rqSz, int lowDma, int *retSzp) -{ - struct page *resp = NULL; - gfp_t page_mask; - int order, a_size; - int resSz; - - if ((rqSz <= 0) || (NULL == retSzp)) - return resp; - - if (lowDma) - page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN; - else - page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; - - for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; - order++, a_size <<= 1) ; - resSz = a_size; /* rounded up if necessary */ - resp = alloc_pages(page_mask, order); - while ((!resp) && order) { - --order; - a_size >>= 1; /* divide by 2, until PAGE_SIZE */ - resp = alloc_pages(page_mask, order); /* try half */ - resSz = a_size; - } - if (resp) { - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) - memset(page_address(resp), 0, resSz); - *retSzp = resSz; - } - return resp; -} - -static void -sg_page_free(struct page *page, int size) -{ - int order, a_size; - - if (!page) - return; - for (order = 0, a_size = PAGE_SIZE; a_size < size; - order++, a_size <<= 1) ; - __free_pages(page, order); -} - #ifdef CONFIG_SCSI_PROC_FS static int sg_idr_max_id(int id, void *p, void *data) -- cgit v1.2.3 From 01cfcddd98f09e05a2e36031654ed46643b76f23 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 28 Aug 2008 15:05:59 +0900 Subject: sg: use blk_rq_aligned helper function Signed-off-by: FUJITA Tomonori Cc: Douglas Gilbert Cc: Jens Axboe Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index d6391666502..ed69292babd 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1656,11 +1656,9 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; - unsigned long uaddr = (unsigned long)hp->dxferp; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; struct request_queue *q = sfp->parentdp->device->request_queue; - unsigned long alignment = queue_dma_alignment(q) | q->dma_pad_mask; struct rq_map_data map_data; SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); @@ -1676,7 +1674,7 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && (!sfp->parentdp->device->host->unchecked_isa_dma) && - !(uaddr & alignment) && !(dxfer_len & alignment)) + blk_rq_aligned(q, hp->dxferp, dxfer_len)) return sg_build_direct(srp, sfp, dxfer_len); #endif if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) -- cgit v1.2.3 From fad7f01e61bf737fe8a3740d803f000db57ecac6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 16:20:20 +0900 Subject: sg: set dxferp to NULL for READ with the older SG interface With the older SG interface, we don't know a user-space address to trasfer data when executing a SCSI command. So we can't pass a user-space address to blk_rq_map_user. This patch fixes sg to pass a NULL user-space address to blk_rq_map_user so that it just sets up a request and bios with page frames propely without data transfer. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index ed69292babd..50c07bca727 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -615,7 +615,10 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) else hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE; hp->dxfer_len = mxsize; - hp->dxferp = (char __user *)buf + cmd_size; + if (hp->dxfer_direction == SG_DXFER_TO_DEV) + hp->dxferp = (char __user *)buf + cmd_size; + else + hp->dxferp = NULL; hp->sbp = NULL; hp->timeout = old_hdr.reply_len; /* structure abuse ... */ hp->flags = input_size; /* structure abuse ... */ -- cgit v1.2.3 From f98a8cae12f2b2a8f9bfd7a53c990a1a405e880e Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Thu, 4 Sep 2008 14:27:35 -0600 Subject: SCSI sd driver calls revalidate_disk wrapper. Modify the SCSI disk driver to call the revalidate_disk() wrapper. This allows us to do some housekeeping such as accounting for a disk being resized online. The wrapper will call sd_revalidate_disk() at the appropriate time. Signed-off-by: Andrew Patterson Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index bcb04b2a767..cb115d1bf22 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -165,7 +165,7 @@ sd_store_cache_type(struct device *dev, struct device_attribute *attr, sd_print_sense_hdr(sdkp, &sshdr); return -EINVAL; } - sd_revalidate_disk(sdkp->disk); + revalidate_disk(sdkp->disk); return count; } @@ -916,7 +916,7 @@ static void sd_rescan(struct device *dev) struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); if (sdkp) { - sd_revalidate_disk(sdkp->disk); + revalidate_disk(sdkp->disk); scsi_disk_put(sdkp); } } -- cgit v1.2.3 From 242f9dcb8ba6f68fcd217a119a7648a4f69290e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Sep 2008 05:55:09 -0700 Subject: block: unify request timeout handling Right now SCSI and others do their own command timeout handling. Move those bits to the block layer. Instead of having a timer per command, we try to be a bit more clever and simply have one per-queue. This avoids the overhead of having to tear down and setup a timer for each command, so it will result in a lot less timer fiddling. Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- drivers/ata/libata-eh.c | 13 ++--- drivers/ata/libata.h | 2 +- drivers/scsi/aacraid/aachba.c | 2 +- drivers/scsi/gdth.c | 60 ++++++++++++++--------- drivers/scsi/gdth.h | 2 +- drivers/scsi/gdth_proc.c | 66 -------------------------- drivers/scsi/gdth_proc.h | 3 -- drivers/scsi/ibmvscsi/ibmvscsi.c | 2 +- drivers/scsi/ide-scsi.c | 2 +- drivers/scsi/ipr.c | 3 +- drivers/scsi/ips.c | 2 +- drivers/scsi/libiscsi.c | 17 +++---- drivers/scsi/libsas/sas_ata.c | 2 +- drivers/scsi/libsas/sas_internal.h | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 30 ++++++------ drivers/scsi/megaraid/megaraid_sas.c | 6 +-- drivers/scsi/ncr53c8xx.c | 4 +- drivers/scsi/qla1280.c | 4 +- drivers/scsi/qla4xxx/ql4_os.c | 4 +- drivers/scsi/scsi.c | 92 +++++++----------------------------- drivers/scsi/scsi_error.c | 90 +++++------------------------------ drivers/scsi/scsi_lib.c | 17 +++++-- drivers/scsi/scsi_priv.h | 7 +-- drivers/scsi/scsi_sysfs.c | 7 ++- drivers/scsi/scsi_transport_fc.c | 6 +-- drivers/scsi/sd.c | 9 ++-- drivers/scsi/sr.c | 5 +- drivers/scsi/sym53c8xx_2/sym_glue.c | 4 +- 28 files changed, 150 insertions(+), 313 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c1db2f234d2..bd0b2bc76f1 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -33,6 +33,7 @@ */ #include +#include #include #include #include @@ -457,29 +458,29 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, * RETURNS: * EH_HANDLED or EH_NOT_HANDLED */ -enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) +enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; struct ata_port *ap = ata_shost_to_port(host); unsigned long flags; struct ata_queued_cmd *qc; - enum scsi_eh_timer_return ret; + enum blk_eh_timer_return ret; DPRINTK("ENTER\n"); if (ap->ops->error_handler) { - ret = EH_NOT_HANDLED; + ret = BLK_EH_NOT_HANDLED; goto out; } - ret = EH_HANDLED; + ret = BLK_EH_HANDLED; spin_lock_irqsave(ap->lock, flags); qc = ata_qc_from_tag(ap, ap->link.active_tag); if (qc) { WARN_ON(qc->scsicmd != cmd); qc->flags |= ATA_QCFLAG_EH_SCHEDULED; qc->err_mask |= AC_ERR_TIMEOUT; - ret = EH_NOT_HANDLED; + ret = BLK_EH_NOT_HANDLED; } spin_unlock_irqrestore(ap->lock, flags); @@ -831,7 +832,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) * Note that ATA_QCFLAG_FAILED is unconditionally set after * this function completes. */ - scsi_req_abort_cmd(qc->scsicmd); + blk_abort_request(qc->scsicmd->request); } /** diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index ade5c75b614..24f5005478b 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -152,7 +152,7 @@ extern int ata_bus_probe(struct ata_port *ap); /* libata-eh.c */ extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd); extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd); -extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); +extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); extern void ata_scsi_error(struct Scsi_Host *host); extern void ata_port_wait_eh(struct ata_port *ap); extern void ata_eh_fastdrain_timerfn(unsigned long arg); diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index aa4e77c2527..8abfd06b5a7 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -1139,7 +1139,7 @@ static struct aac_srb * aac_scsi_common(struct fib * fib, struct scsi_cmnd * cmd srbcmd->id = cpu_to_le32(scmd_id(cmd)); srbcmd->lun = cpu_to_le32(cmd->device->lun); srbcmd->flags = cpu_to_le32(flag); - timeout = cmd->timeout_per_command/HZ; + timeout = cmd->request->timeout/HZ; if (timeout == 0) timeout = 1; srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 822d5214692..c387c15a212 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -464,7 +464,6 @@ int __gdth_execute(struct scsi_device *sdev, gdth_cmd_str *gdtcmd, char *cmnd, /* use request field to save the ptr. to completion struct. */ scp->request = (struct request *)&wait; - scp->timeout_per_command = timeout*HZ; scp->cmd_len = 12; scp->cmnd = cmnd; cmndinfo.priority = IOCTL_PRI; @@ -1995,23 +1994,12 @@ static void gdth_putq(gdth_ha_str *ha, Scsi_Cmnd *scp, unchar priority) register Scsi_Cmnd *pscp; register Scsi_Cmnd *nscp; ulong flags; - unchar b, t; TRACE(("gdth_putq() priority %d\n",priority)); spin_lock_irqsave(&ha->smp_lock, flags); - if (!cmndinfo->internal_command) { + if (!cmndinfo->internal_command) cmndinfo->priority = priority; - b = scp->device->channel; - t = scp->device->id; - if (priority >= DEFAULT_PRI) { - if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha,b)].lock) || - (b==ha->virt_bus && thdr[t].lock)) { - TRACE2(("gdth_putq(): locked IO ->update_timeout()\n")); - cmndinfo->timeout = gdth_update_timeout(scp, 0); - } - } - } if (ha->req_first==NULL) { ha->req_first = scp; /* queue was empty */ @@ -3899,6 +3887,39 @@ static const char *gdth_info(struct Scsi_Host *shp) return ((const char *)ha->binfo.type_string); } +static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp) +{ + gdth_ha_str *ha = shost_priv(scp->device->host); + struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); + unchar b, t; + ulong flags; + enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED; + + TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__)); + b = scp->device->channel; + t = scp->device->id; + + /* + * We don't really honor the command timeout, but we try to + * honor 6 times of the actual command timeout! So reset the + * timer if this is less than 6th timeout on this command! + */ + if (++cmndinfo->timeout_count < 6) + retval = BLK_EH_RESET_TIMER; + + /* Reset the timeout if it is locked IO */ + spin_lock_irqsave(&ha->smp_lock, flags); + if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha, b)].lock) || + (b == ha->virt_bus && t < MAX_HDRIVES && ha->hdr[t].lock)) { + TRACE2(("%s(): locked IO, reset timeout\n", __func__)); + retval = BLK_EH_RESET_TIMER; + } + spin_unlock_irqrestore(&ha->smp_lock, flags); + + return retval; +} + + static int gdth_eh_bus_reset(Scsi_Cmnd *scp) { gdth_ha_str *ha = shost_priv(scp->device->host); @@ -3992,7 +4013,7 @@ static int gdth_queuecommand(struct scsi_cmnd *scp, BUG_ON(!cmndinfo); scp->scsi_done = done; - gdth_update_timeout(scp, scp->timeout_per_command * 6); + cmndinfo->timeout_count = 0; cmndinfo->priority = DEFAULT_PRI; return __gdth_queuecommand(ha, scp, cmndinfo); @@ -4096,12 +4117,10 @@ static int ioc_lockdrv(void __user *arg) ha->hdr[j].lock = 1; spin_unlock_irqrestore(&ha->smp_lock, flags); gdth_wait_completion(ha, ha->bus_cnt, j); - gdth_stop_timeout(ha, ha->bus_cnt, j); } else { spin_lock_irqsave(&ha->smp_lock, flags); ha->hdr[j].lock = 0; spin_unlock_irqrestore(&ha->smp_lock, flags); - gdth_start_timeout(ha, ha->bus_cnt, j); gdth_next(ha); } } @@ -4539,18 +4558,14 @@ static int gdth_ioctl(struct inode *inode, struct file *filep, spin_lock_irqsave(&ha->smp_lock, flags); ha->raw[i].lock = 1; spin_unlock_irqrestore(&ha->smp_lock, flags); - for (j = 0; j < ha->tid_cnt; ++j) { + for (j = 0; j < ha->tid_cnt; ++j) gdth_wait_completion(ha, i, j); - gdth_stop_timeout(ha, i, j); - } } else { spin_lock_irqsave(&ha->smp_lock, flags); ha->raw[i].lock = 0; spin_unlock_irqrestore(&ha->smp_lock, flags); - for (j = 0; j < ha->tid_cnt; ++j) { - gdth_start_timeout(ha, i, j); + for (j = 0; j < ha->tid_cnt; ++j) gdth_next(ha); - } } } break; @@ -4644,6 +4659,7 @@ static struct scsi_host_template gdth_template = { .slave_configure = gdth_slave_configure, .bios_param = gdth_bios_param, .proc_info = gdth_proc_info, + .eh_timed_out = gdth_timed_out, .proc_name = "gdth", .can_queue = GDTH_MAXCMDS, .this_id = -1, diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h index ca92476727c..1646444e9bd 100644 --- a/drivers/scsi/gdth.h +++ b/drivers/scsi/gdth.h @@ -916,7 +916,7 @@ typedef struct { gdth_cmd_str *internal_cmd_str; /* crier for internal messages*/ dma_addr_t sense_paddr; /* sense dma-addr */ unchar priority; - int timeout; + int timeout_count; /* # of timeout calls */ volatile int wait_for_completion; ushort status; ulong32 info; diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c index ce0228e26ae..59349a316e1 100644 --- a/drivers/scsi/gdth_proc.c +++ b/drivers/scsi/gdth_proc.c @@ -748,69 +748,3 @@ static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id) } spin_unlock_irqrestore(&ha->smp_lock, flags); } - -static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id) -{ - ulong flags; - Scsi_Cmnd *scp; - unchar b, t; - - spin_lock_irqsave(&ha->smp_lock, flags); - - for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) { - struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); - if (!cmndinfo->internal_command) { - b = scp->device->channel; - t = scp->device->id; - if (t == (unchar)id && b == (unchar)busnum) { - TRACE2(("gdth_stop_timeout(): update_timeout()\n")); - cmndinfo->timeout = gdth_update_timeout(scp, 0); - } - } - } - spin_unlock_irqrestore(&ha->smp_lock, flags); -} - -static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id) -{ - ulong flags; - Scsi_Cmnd *scp; - unchar b, t; - - spin_lock_irqsave(&ha->smp_lock, flags); - - for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) { - struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); - if (!cmndinfo->internal_command) { - b = scp->device->channel; - t = scp->device->id; - if (t == (unchar)id && b == (unchar)busnum) { - TRACE2(("gdth_start_timeout(): update_timeout()\n")); - gdth_update_timeout(scp, cmndinfo->timeout); - } - } - } - spin_unlock_irqrestore(&ha->smp_lock, flags); -} - -static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout) -{ - int oldto; - - oldto = scp->timeout_per_command; - scp->timeout_per_command = timeout; - - if (timeout == 0) { - del_timer(&scp->eh_timeout); - scp->eh_timeout.data = (unsigned long) NULL; - scp->eh_timeout.expires = 0; - } else { - if (scp->eh_timeout.data != (unsigned long) NULL) - del_timer(&scp->eh_timeout); - scp->eh_timeout.data = (unsigned long) scp; - scp->eh_timeout.expires = jiffies + timeout; - add_timer(&scp->eh_timeout); - } - - return oldto; -} diff --git a/drivers/scsi/gdth_proc.h b/drivers/scsi/gdth_proc.h index 45e6fdacf36..9b900cc9ebe 100644 --- a/drivers/scsi/gdth_proc.h +++ b/drivers/scsi/gdth_proc.h @@ -20,9 +20,6 @@ static char *gdth_ioctl_alloc(gdth_ha_str *ha, int size, int scratch, ulong64 *paddr); static void gdth_ioctl_free(gdth_ha_str *ha, int size, char *buf, ulong64 paddr); static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id); -static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id); -static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id); -static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout); #endif diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 7b1502c0ab6..87e09f35d3d 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -756,7 +756,7 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd, init_event_struct(evt_struct, handle_cmd_rsp, VIOSRP_SRP_FORMAT, - cmnd->timeout_per_command/HZ); + cmnd->request->timeout/HZ); evt_struct->cmnd = cmnd; evt_struct->cmnd_done = done; diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 461331d3dc4..81c16cba541 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -612,7 +612,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd, pc->req_xfer = pc->buf_size = scsi_bufflen(cmd); pc->scsi_cmd = cmd; pc->done = done; - pc->timeout = jiffies + cmd->timeout_per_command; + pc->timeout = jiffies + cmd->request->timeout; if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index e7a3a655442..d30eb7ba018 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3670,7 +3670,8 @@ static int ipr_slave_configure(struct scsi_device *sdev) sdev->no_uld_attach = 1; } if (ipr_is_vset_device(res)) { - sdev->timeout = IPR_VSET_RW_TIMEOUT; + blk_queue_rq_timeout(sdev->request_queue, + IPR_VSET_RW_TIMEOUT); blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS); } if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res)) diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index bc9e6ddf41d..ef683f0d2b5 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -3818,7 +3818,7 @@ ips_send_cmd(ips_ha_t * ha, ips_scb_t * scb) scb->cmd.dcdb.segment_4G = 0; scb->cmd.dcdb.enhanced_sg = 0; - TimeOut = scb->scsi_cmd->timeout_per_command; + TimeOut = scb->scsi_cmd->request->timeout; if (ha->subsys->param[4] & 0x00100000) { /* If NEW Tape DCDB is Supported */ if (!scb->sg_len) { diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 299e075a7b3..1eca82420aa 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1476,12 +1476,12 @@ static void iscsi_start_tx(struct iscsi_conn *conn) scsi_queue_work(conn->session->host, &conn->xmitwork); } -static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) +static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct iscsi_conn *conn; - enum scsi_eh_timer_return rc = EH_NOT_HANDLED; + enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; cls_session = starget_to_session(scsi_target(scmd->device)); session = cls_session->dd_data; @@ -1494,14 +1494,14 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) * We are probably in the middle of iscsi recovery so let * that complete and handle the error. */ - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; goto done; } conn = session->leadconn; if (!conn) { /* In the middle of shuting down */ - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; goto done; } @@ -1513,20 +1513,21 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) */ if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + (conn->ping_timeout * HZ), jiffies)) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; /* * if we are about to check the transport then give the command * more time */ if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ), jiffies)) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; /* if in the middle of checking the transport then give us more time */ if (conn->ping_task) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; done: spin_unlock(&session->lock); - debug_scsi("return %s\n", rc == EH_RESET_TIMER ? "timer reset" : "nh"); + debug_scsi("return %s\n", rc == BLK_EH_RESET_TIMER ? + "timer reset" : "nh"); return rc; } diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 48ee8c7f5bd..837b095ba90 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -398,7 +398,7 @@ void sas_ata_task_abort(struct sas_task *task) /* Bounce SCSI-initiated commands to the SCSI EH */ if (qc->scsicmd) { - scsi_req_abort_cmd(qc->scsicmd); + blk_abort_request(qc->scsicmd->request); scsi_schedule_eh(qc->scsicmd->device->host); return; } diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index b4f9368f116..0001374bd6b 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -55,7 +55,7 @@ void sas_unregister_phys(struct sas_ha_struct *sas_ha); int sas_register_ports(struct sas_ha_struct *sas_ha); void sas_unregister_ports(struct sas_ha_struct *sas_ha); -enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); +enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); int sas_init_queue(struct sas_ha_struct *sas_ha); int sas_init_events(struct sas_ha_struct *sas_ha); diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index a8e3ef30907..744838780ad 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -673,43 +673,43 @@ out: return; } -enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) +enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) { struct sas_task *task = TO_SAS_TASK(cmd); unsigned long flags; if (!task) { - cmd->timeout_per_command /= 2; + cmd->request->timeout /= 2; SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n", - cmd, task, (cmd->timeout_per_command ? - "EH_RESET_TIMER" : "EH_NOT_HANDLED")); - if (!cmd->timeout_per_command) - return EH_NOT_HANDLED; - return EH_RESET_TIMER; + cmd, task, (cmd->request->timeout ? + "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED")); + if (!cmd->request->timeout) + return BLK_EH_NOT_HANDLED; + return BLK_EH_RESET_TIMER; } spin_lock_irqsave(&task->task_state_lock, flags); BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED); if (task->task_state_flags & SAS_TASK_STATE_DONE) { spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", - cmd, task); - return EH_HANDLED; + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: " + "BLK_EH_HANDLED\n", cmd, task); + return BLK_EH_HANDLED; } if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) { spin_unlock_irqrestore(&task->task_state_lock, flags); SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: " - "EH_RESET_TIMER\n", + "BLK_EH_RESET_TIMER\n", cmd, task); - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; } task->task_state_flags |= SAS_TASK_STATE_ABORTED; spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n", + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n", cmd, task); - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) @@ -1039,7 +1039,7 @@ void sas_task_abort(struct sas_task *task) return; } - scsi_req_abort_cmd(sc); + blk_abort_request(sc->request); scsi_schedule_eh(sc->device->host); } diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index 97b763378e7..afe1de99876 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -1167,7 +1167,7 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd) * cmd has not been completed within the timeout period. */ static enum -scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) +blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) { struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr; struct megasas_instance *instance; @@ -1175,7 +1175,7 @@ scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) if (time_after(jiffies, scmd->jiffies_at_alloc + (MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) { - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } instance = cmd->instance; @@ -1189,7 +1189,7 @@ scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) spin_unlock_irqrestore(instance->host->host_lock, flags); } - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; } /** diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index c57c94c0ffd..3b7240e4081 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -4170,8 +4170,8 @@ static int ncr_queue_command (struct ncb *np, struct scsi_cmnd *cmd) ** **---------------------------------------------------- */ - if (np->settle_time && cmd->timeout_per_command >= HZ) { - u_long tlimit = jiffies + cmd->timeout_per_command - HZ; + if (np->settle_time && cmd->request->timeout >= HZ) { + u_long tlimit = jiffies + cmd->request->timeout - HZ; if (time_after(np->settle_time, tlimit)) np->settle_time = tlimit; } diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 37f9ba0cd79..b6cd12b2e99 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2845,7 +2845,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); /* Set ISP command timeout. */ - pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); + pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ); /* Set device target ID and LUN */ pkt->lun = SCSI_LUN_32(cmd); @@ -3114,7 +3114,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); /* Set ISP command timeout. */ - pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); + pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ); /* Set device target ID and LUN */ pkt->lun = SCSI_LUN_32(cmd); diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 88bebb13bc5..de8279ad7d8 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1542,7 +1542,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) DEBUG2(printk(KERN_INFO "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x," "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no, - cmd, jiffies, cmd->timeout_per_command / HZ, + cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); /* FIXME: wait for hba to go online */ @@ -1598,7 +1598,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd) DEBUG2(printk(KERN_INFO "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, " "to=%x,dpc_flags=%lx, status=%x allowed=%d\n", - ha->host_no, cmd, jiffies, cmd->timeout_per_command / HZ, + ha->host_no, cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); stat = qla4xxx_reset_target(ha, ddb_entry); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index ee6be596503..dbeb86cafc0 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -291,7 +291,6 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask) unsigned long flags; cmd->device = dev; - init_timer(&cmd->eh_timeout); INIT_LIST_HEAD(&cmd->list); spin_lock_irqsave(&dev->list_lock, flags); list_add_tail(&cmd->list, &dev->cmd_list); @@ -652,14 +651,19 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) unsigned long timeout; int rtn = 0; + /* + * We will use a queued command if possible, otherwise we will + * emulate the queuing and calling of completion function ourselves. + */ + atomic_inc(&cmd->device->iorequest_cnt); + /* check if the device is still usable */ if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { /* in SDEV_DEL we error all commands. DID_NO_CONNECT * returns an immediate error upwards, and signals * that the device is no longer present */ cmd->result = DID_NO_CONNECT << 16; - atomic_inc(&cmd->device->iorequest_cnt); - __scsi_done(cmd); + scsi_done(cmd); /* return 0 (because the command has been processed) */ goto out; } @@ -672,6 +676,7 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) * future requests should not occur until the device * transitions out of the suspend state. */ + scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n")); @@ -714,20 +719,8 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) host->resetting = 0; } - /* - * AK: unlikely race here: for some reason the timer could - * expire before the serial number is set up below. - */ - scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out); - scsi_log_send(cmd); - /* - * We will use a queued command if possible, otherwise we will - * emulate the queuing and calling of completion function ourselves. - */ - atomic_inc(&cmd->device->iorequest_cnt); - /* * Before we queue this command, check if the command * length exceeds what the host adapter can handle. @@ -744,6 +737,12 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) } spin_lock_irqsave(host->host_lock, flags); + /* + * AK: unlikely race here: for some reason the timer could + * expire before the serial number is set up below. + * + * TODO: kill serial or move to blk layer + */ scsi_cmd_get_serial(host, cmd); if (unlikely(host->shost_state == SHOST_DEL)) { @@ -754,12 +753,8 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) } spin_unlock_irqrestore(host->host_lock, flags); if (rtn) { - if (scsi_delete_timer(cmd)) { - atomic_inc(&cmd->device->iodone_cnt); - scsi_queue_insert(cmd, - (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? - rtn : SCSI_MLQUEUE_HOST_BUSY); - } + scsi_queue_insert(cmd, (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? + rtn : SCSI_MLQUEUE_HOST_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n")); } @@ -769,24 +764,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) return rtn; } -/** - * scsi_req_abort_cmd -- Request command recovery for the specified command - * @cmd: pointer to the SCSI command of interest - * - * This function requests that SCSI Core start recovery for the - * command by deleting the timer and adding the command to the eh - * queue. It can be called by either LLDDs or SCSI Core. LLDDs who - * implement their own error recovery MAY ignore the timeout event if - * they generated scsi_req_abort_cmd. - */ -void scsi_req_abort_cmd(struct scsi_cmnd *cmd) -{ - if (!scsi_delete_timer(cmd)) - return; - scsi_times_out(cmd); -} -EXPORT_SYMBOL(scsi_req_abort_cmd); - /** * scsi_done - Enqueue the finished SCSI command into the done queue. * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives @@ -802,42 +779,7 @@ EXPORT_SYMBOL(scsi_req_abort_cmd); */ static void scsi_done(struct scsi_cmnd *cmd) { - /* - * We don't have to worry about this one timing out anymore. - * If we are unable to remove the timer, then the command - * has already timed out. In which case, we have no choice but to - * let the timeout function run, as we have no idea where in fact - * that function could really be. It might be on another processor, - * etc, etc. - */ - if (!scsi_delete_timer(cmd)) - return; - __scsi_done(cmd); -} - -/* Private entry to scsi_done() to complete a command when the timer - * isn't running --- used by scsi_times_out */ -void __scsi_done(struct scsi_cmnd *cmd) -{ - struct request *rq = cmd->request; - - /* - * Set the serial numbers back to zero - */ - cmd->serial_number = 0; - - atomic_inc(&cmd->device->iodone_cnt); - if (cmd->result) - atomic_inc(&cmd->device->ioerr_cnt); - - BUG_ON(!rq); - - /* - * The uptodate/nbytes values don't matter, as we allow partial - * completes and thus will check this in the softirq callback - */ - rq->completion_data = cmd; - blk_complete_request(rq); + blk_complete_request(cmd->request); } /* Move this to a header if it becomes more generally useful */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 39ce3aba1da..fecefa05cb6 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -111,70 +111,9 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) return ret; } -/** - * scsi_add_timer - Start timeout timer for a single scsi command. - * @scmd: scsi command that is about to start running. - * @timeout: amount of time to allow this command to run. - * @complete: timeout function to call if timer isn't canceled. - * - * Notes: - * This should be turned into an inline function. Each scsi command - * has its own timer, and as it is added to the queue, we set up the - * timer. When the command completes, we cancel the timer. - */ -void scsi_add_timer(struct scsi_cmnd *scmd, int timeout, - void (*complete)(struct scsi_cmnd *)) -{ - - /* - * If the clock was already running for this command, then - * first delete the timer. The timer handling code gets rather - * confused if we don't do this. - */ - if (scmd->eh_timeout.function) - del_timer(&scmd->eh_timeout); - - scmd->eh_timeout.data = (unsigned long)scmd; - scmd->eh_timeout.expires = jiffies + timeout; - scmd->eh_timeout.function = (void (*)(unsigned long)) complete; - - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:" - " %d, (%p)\n", __func__, - scmd, timeout, complete)); - - add_timer(&scmd->eh_timeout); -} - -/** - * scsi_delete_timer - Delete/cancel timer for a given function. - * @scmd: Cmd that we are canceling timer for - * - * Notes: - * This should be turned into an inline function. - * - * Return value: - * 1 if we were able to detach the timer. 0 if we blew it, and the - * timer function has already started to run. - */ -int scsi_delete_timer(struct scsi_cmnd *scmd) -{ - int rtn; - - rtn = del_timer(&scmd->eh_timeout); - - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p," - " rtn: %d\n", __func__, - scmd, rtn)); - - scmd->eh_timeout.data = (unsigned long)NULL; - scmd->eh_timeout.function = NULL; - - return rtn; -} - /** * scsi_times_out - Timeout function for normal scsi commands. - * @scmd: Cmd that is timing out. + * @req: request that is timing out. * * Notes: * We do not need to lock this. There is the potential for a race @@ -182,9 +121,11 @@ int scsi_delete_timer(struct scsi_cmnd *scmd) * normal completion function determines that the timer has already * fired, then it mustn't do anything. */ -void scsi_times_out(struct scsi_cmnd *scmd) +enum blk_eh_timer_return scsi_times_out(struct request *req) { - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); + struct scsi_cmnd *scmd = req->special; + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); + enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; scsi_log_completion(scmd, TIMEOUT_ERROR); @@ -196,22 +137,20 @@ void scsi_times_out(struct scsi_cmnd *scmd) eh_timed_out = NULL; if (eh_timed_out) - switch (eh_timed_out(scmd)) { - case EH_HANDLED: - __scsi_done(scmd); - return; - case EH_RESET_TIMER: - scsi_add_timer(scmd, scmd->timeout_per_command, - scsi_times_out); - return; - case EH_NOT_HANDLED: + rtn = eh_timed_out(scmd); + switch (rtn) { + case BLK_EH_NOT_HANDLED: break; + default: + return rtn; } if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { scmd->result |= DID_TIME_OUT << 16; - __scsi_done(scmd); + return BLK_EH_HANDLED; } + + return BLK_EH_NOT_HANDLED; } /** @@ -1793,7 +1732,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag) blk_rq_init(NULL, &req); scmd->request = &req; - memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout)); scmd->cmnd = req.cmd; @@ -1804,8 +1742,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag) scmd->sc_data_direction = DMA_BIDIRECTIONAL; - init_timer(&scmd->eh_timeout); - spin_lock_irqsave(shost->host_lock, flags); shost->tmf_in_progress = 1; spin_unlock_irqrestore(shost->host_lock, flags); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 62307bd794a..e7686500e9d 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1181,7 +1181,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) cmd->transfersize = req->data_len; cmd->allowed = req->retries; - cmd->timeout_per_command = req->timeout; return BLKPREP_OK; } EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd); @@ -1416,17 +1415,26 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) spin_unlock(shost->host_lock); spin_lock(sdev->request_queue->queue_lock); - __scsi_done(cmd); + blk_complete_request(req); } static void scsi_softirq_done(struct request *rq) { - struct scsi_cmnd *cmd = rq->completion_data; - unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command; + struct scsi_cmnd *cmd = rq->special; + unsigned long wait_for = (cmd->allowed + 1) * rq->timeout; int disposition; INIT_LIST_HEAD(&cmd->eh_entry); + /* + * Set the serial numbers back to zero + */ + cmd->serial_number = 0; + + atomic_inc(&cmd->device->iodone_cnt); + if (cmd->result) + atomic_inc(&cmd->device->ioerr_cnt); + disposition = scsi_decide_disposition(cmd); if (disposition != SUCCESS && time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { @@ -1675,6 +1683,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) blk_queue_prep_rq(q, scsi_prep_fn); blk_queue_softirq_done(q, scsi_softirq_done); + blk_queue_rq_timed_out(q, scsi_times_out); return q; } diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 79f0f751120..6cddd5dd323 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -4,6 +4,7 @@ #include struct request_queue; +struct request; struct scsi_cmnd; struct scsi_device; struct scsi_host_template; @@ -27,7 +28,6 @@ extern void scsi_exit_hosts(void); extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd); extern int scsi_setup_command_freelist(struct Scsi_Host *shost); extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); -extern void __scsi_done(struct scsi_cmnd *cmd); #ifdef CONFIG_SCSI_LOGGING void scsi_log_send(struct scsi_cmnd *cmd); void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); @@ -49,10 +49,7 @@ extern int __init scsi_init_devinfo(void); extern void scsi_exit_devinfo(void); /* scsi_error.c */ -extern void scsi_add_timer(struct scsi_cmnd *, int, - void (*)(struct scsi_cmnd *)); -extern int scsi_delete_timer(struct scsi_cmnd *); -extern void scsi_times_out(struct scsi_cmnd *cmd); +extern enum blk_eh_timer_return scsi_times_out(struct request *req); extern int scsi_error_handler(void *host); extern int scsi_decide_disposition(struct scsi_cmnd *cmd); extern void scsi_eh_wakeup(struct Scsi_Host *shost); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index ab3c71869be..7f618ee5ece 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -560,12 +560,15 @@ sdev_rd_attr (vendor, "%.8s\n"); sdev_rd_attr (model, "%.16s\n"); sdev_rd_attr (rev, "%.4s\n"); +/* + * TODO: can we make these symlinks to the block layer ones? + */ static ssize_t sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev; sdev = to_scsi_device(dev); - return snprintf (buf, 20, "%d\n", sdev->timeout / HZ); + return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ); } static ssize_t @@ -576,7 +579,7 @@ sdev_store_timeout (struct device *dev, struct device_attribute *attr, int timeout; sdev = to_scsi_device(dev); sscanf (buf, "%d\n", &timeout); - sdev->timeout = timeout * HZ; + blk_queue_rq_timeout(sdev->request_queue, timeout * HZ); return count; } static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 56823fd1fb8..9168883d0df 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -1950,15 +1950,15 @@ static int fc_vport_match(struct attribute_container *cont, * Notes: * This routine assumes no locks are held on entry. */ -static enum scsi_eh_timer_return +static enum blk_eh_timer_return fc_timed_out(struct scsi_cmnd *scmd) { struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device)); if (rport->port_state == FC_PORTSTATE_BLOCKED) - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cb115d1bf22..c0cf4acda7d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -383,7 +383,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) sector_t block = rq->sector; sector_t threshold; unsigned int this_count = rq->nr_sectors; - unsigned int timeout = sdp->timeout; int ret; if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { @@ -584,7 +583,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) SCpnt->transfersize = sdp->sector_size; SCpnt->underflow = this_count << 9; SCpnt->allowed = SD_MAX_RETRIES; - SCpnt->timeout_per_command = timeout; /* * This indicates that the command is ready from our end to be @@ -1878,11 +1876,12 @@ static int sd_probe(struct device *dev) sdkp->openers = 0; sdkp->previous_state = 1; - if (!sdp->timeout) { + if (!sdp->request_queue->rq_timeout) { if (sdp->type != TYPE_MOD) - sdp->timeout = SD_TIMEOUT; + blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); else - sdp->timeout = SD_MOD_TIMEOUT; + blk_queue_rq_timeout(sdp->request_queue, + SD_MOD_TIMEOUT); } device_initialize(&sdkp->dev); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 8dbe3798d5f..0f17009c99d 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -331,7 +331,7 @@ static int sr_done(struct scsi_cmnd *SCpnt) static int sr_prep_fn(struct request_queue *q, struct request *rq) { - int block=0, this_count, s_size, timeout = SR_TIMEOUT; + int block = 0, this_count, s_size; struct scsi_cd *cd; struct scsi_cmnd *SCpnt; struct scsi_device *sdp = q->queuedata; @@ -461,7 +461,6 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq) SCpnt->transfersize = cd->device->sector_size; SCpnt->underflow = this_count << 9; SCpnt->allowed = MAX_RETRIES; - SCpnt->timeout_per_command = timeout; /* * This indicates that the command is ready from our end to be @@ -620,6 +619,8 @@ static int sr_probe(struct device *dev) disk->fops = &sr_bdops; disk->flags = GENHD_FL_CD; + blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); + cd->device = sdev; cd->disk = disk; cd->driver = &sr_template; diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index d39107b7669..f4e6cde1fd0 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -519,8 +519,8 @@ static int sym53c8xx_queue_command(struct scsi_cmnd *cmd, * Shorten our settle_time if needed for * this command not to time out. */ - if (np->s.settle_time_valid && cmd->timeout_per_command) { - unsigned long tlimit = jiffies + cmd->timeout_per_command; + if (np->s.settle_time_valid && cmd->request->timeout) { + unsigned long tlimit = jiffies + cmd->request->timeout; tlimit -= SYM_CONF_TIMER_INTERVAL*2; if (time_after(np->s.settle_time, tlimit)) { np->s.settle_time = tlimit; -- cgit v1.2.3 From 224cb3e981f1b2f9f93dbd49eaef505d17d894c2 Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Fri, 29 Aug 2008 09:36:09 +0200 Subject: dm: Call blk_abort_queue on failed paths Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- drivers/md/dm-mpath.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2fcf28b4c7..3d3848132c6 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -33,6 +33,7 @@ struct pgpath { unsigned fail_count; /* Cumulative failure count */ struct dm_path path; + struct work_struct deactivate_path; }; #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) @@ -112,6 +113,7 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void process_queued_ios(struct work_struct *work); static void trigger_event(struct work_struct *work); static void activate_path(struct work_struct *work); +static void deactivate_path(struct work_struct *work); /*----------------------------------------------- @@ -122,8 +124,10 @@ static struct pgpath *alloc_pgpath(void) { struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); - if (pgpath) + if (pgpath) { pgpath->path.is_active = 1; + INIT_WORK(&pgpath->deactivate_path, deactivate_path); + } return pgpath; } @@ -133,6 +137,14 @@ static void free_pgpath(struct pgpath *pgpath) kfree(pgpath); } +static void deactivate_path(struct work_struct *work) +{ + struct pgpath *pgpath = + container_of(work, struct pgpath, deactivate_path); + + blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); +} + static struct priority_group *alloc_priority_group(void) { struct priority_group *pg; @@ -870,6 +882,7 @@ static int fail_path(struct pgpath *pgpath) pgpath->path.dev->name, m->nr_valid_paths); queue_work(kmultipathd, &m->trigger_event); + queue_work(kmultipathd, &pgpath->deactivate_path); out: spin_unlock_irqrestore(&m->lock, flags); -- cgit v1.2.3 From a91a3a20e06621b9931793888583efe37db4e4e8 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:01 +0900 Subject: sg: rename sg_cmd_done sg_rq_end_io old sg_rq_end_io() was used to wrap sg_cmd_done during converting sg to use the block layer (in order to cover the difference scsi_execute_async and blk_execute_rq_nowait). Now we don't need it so let's remove it. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 50c07bca727..d18f90d1d9a 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ static int sg_fasync(int fd, struct file *filp, int mode); /* tasklet or soft irq callback */ -static void sg_cmd_done(void *data, char *sense, int result, int resid); +static void sg_rq_end_io(struct request *rq, int uptodate); static int sg_start_req(Sg_request *srp, unsigned char *cmd); static void sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); @@ -227,11 +227,6 @@ static int sg_allow_access(struct file *filp, unsigned char *cmd) cmd, filp->f_mode & FMODE_WRITE); } -static void sg_rq_end_io(struct request *rq, int uptodate) -{ - sg_cmd_done(rq->end_io_data, rq->sense, rq->errors, rq->data_len); -} - static int sg_open(struct inode *inode, struct file *filp) { @@ -1257,16 +1252,19 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } -/* This function is a "bottom half" handler that is called by the - * mid level when a command is completed (or has failed). */ -static void -sg_cmd_done(void *data, char *sense, int result, int resid) +/* + * This function is a "bottom half" handler that is called by the mid + * level when a command is completed (or has failed). + */ +static void sg_rq_end_io(struct request *rq, int uptodate) { - Sg_request *srp = data; + struct sg_request *srp = rq->end_io_data; Sg_device *sdp = NULL; Sg_fd *sfp; unsigned long iflags; unsigned int ms; + char *sense; + int result, resid; if (NULL == srp) { printk(KERN_ERR "sg_cmd_done: NULL request\n"); @@ -1280,6 +1278,9 @@ sg_cmd_done(void *data, char *sense, int result, int resid) return; } + sense = rq->sense; + result = rq->errors; + resid = rq->data_len; SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n", sdp->disk->disk_name, srp->header.pack_id, result)); -- cgit v1.2.3 From 7e56cb0f7e7a132803ffefa0a5a15fb2079afaf1 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:02 +0900 Subject: sg: remove SG_ALLOW_DIO_CODE define sg had lots of the own functions for the direct IO but now sg uses the block layer functions for it. There are only five lines for the direct IO. SG_ALLOW_DIO_CODE define was used to compile out the direct IO code but we don't need the define. If someone wants to remove the direct IO code, he can do easily without the define. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index d18f90d1d9a..2c30331abbe 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -68,7 +68,6 @@ static void sg_proc_cleanup(void); #endif #define SG_ALLOW_DIO_DEF 0 -#define SG_ALLOW_DIO_CODE /* compile out by commenting this define */ #define SG_MAX_DEVS 32768 @@ -1674,13 +1673,12 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; -#ifdef SG_ALLOW_DIO_CODE if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && (!sfp->parentdp->device->host->unchecked_isa_dma) && blk_rq_aligned(q, hp->dxferp, dxfer_len)) return sg_build_direct(srp, sfp, dxfer_len); -#endif + if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) sg_link_reserve(sfp, srp, dxfer_len); else -- cgit v1.2.3 From fd1c1de0766844af4cfc39298e109ad273e72a9e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:03 +0900 Subject: sg: remove b_malloc_len in sg_scatter_hold struct It's not used for anything useful after the block layer conversion. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 2c30331abbe..ccce31a400e 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -116,7 +116,6 @@ typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */ unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */ unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ unsigned bufflen; /* Size of (aggregate) data buffer */ - unsigned b_malloc_len; /* actual len malloc'ed in buffer */ struct page **pages; int page_order; char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ @@ -1986,7 +1985,6 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size) req_schp->pages = rsv_schp->pages; req_schp->bufflen = size; - req_schp->b_malloc_len = rsv_schp->b_malloc_len; req_schp->page_order = rsv_schp->page_order; break; } else -- cgit v1.2.3 From 44c7b0eaa041007066e30ab4869d5bbf8dad5989 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:04 +0900 Subject: sg: remove __sg_start_req __sg_start_req() was used temporarily to call blk_get_request() during converting sg to use the block layer. Now sg always calls blk_get_request() so we can move blk_get_request() to sg_start_req(). We don't need __sg_start_req anymore. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index ccce31a400e..9a56c0d320b 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1626,14 +1626,23 @@ exit_sg(void) idr_destroy(&sg_index_idr); } -static int __sg_start_req(struct sg_request *srp, struct sg_io_hdr *hp, - unsigned char *cmd) +static int sg_start_req(Sg_request *srp, unsigned char *cmd) { - struct sg_fd *sfp = srp->parentfp; - struct request_queue *q = sfp->parentdp->device->request_queue; + int res = 0; struct request *rq; + Sg_fd *sfp = srp->parentfp; + sg_io_hdr_t *hp = &srp->header; + int dxfer_len = (int) hp->dxfer_len; + int dxfer_dir = hp->dxfer_direction; + Sg_scatter_hold *req_schp = &srp->data; + Sg_scatter_hold *rsv_schp = &sfp->reserve; + struct request_queue *q = sfp->parentdp->device->request_queue; + struct rq_map_data map_data; int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; + SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n", + dxfer_len)); + rq = blk_get_request(q, rw, GFP_ATOMIC); if (!rq) return -ENOMEM; @@ -1648,27 +1657,6 @@ static int __sg_start_req(struct sg_request *srp, struct sg_io_hdr *hp, rq->sense = srp->sense_b; rq->retries = SG_DEFAULT_RETRIES; - return 0; -} - -static int sg_start_req(Sg_request *srp, unsigned char *cmd) -{ - int res; - Sg_fd *sfp = srp->parentfp; - sg_io_hdr_t *hp = &srp->header; - int dxfer_len = (int) hp->dxfer_len; - int dxfer_dir = hp->dxfer_direction; - Sg_scatter_hold *req_schp = &srp->data; - Sg_scatter_hold *rsv_schp = &sfp->reserve; - struct request_queue *q = sfp->parentdp->device->request_queue; - struct rq_map_data map_data; - - SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); - - res = __sg_start_req(srp, hp, cmd); - if (res) - return res; - if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; -- cgit v1.2.3 From 626710c9d665ff381c7ec666b6a023f064ca5fef Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:05 +0900 Subject: sg: incorporate sg_build_direct into sg_start_req Calling blk_rq_map_user() at a single place is better than at different two places. It makes the code more understandable. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 80 +++++++++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 47 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9a56c0d320b..c0b6866eece 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -202,7 +202,6 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); static int sg_res_in_use(Sg_fd * sfp); -static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len); static Sg_device *sg_get_dev(int dev); #ifdef CONFIG_SCSI_PROC_FS static int sg_last_dev(void); @@ -1628,16 +1627,17 @@ exit_sg(void) static int sg_start_req(Sg_request *srp, unsigned char *cmd) { - int res = 0; + int res; struct request *rq; Sg_fd *sfp = srp->parentfp; sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; + unsigned int iov_count = hp->iovec_count; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; struct request_queue *q = sfp->parentdp->device->request_queue; - struct rq_map_data map_data; + struct rq_map_data *md, map_data; int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n", @@ -1660,38 +1660,43 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; - if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && - (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && - (!sfp->parentdp->device->host->unchecked_isa_dma) && + if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO && + dxfer_dir != SG_DXFER_UNKNOWN && !iov_count && + !sfp->parentdp->device->host->unchecked_isa_dma && blk_rq_aligned(q, hp->dxferp, dxfer_len)) - return sg_build_direct(srp, sfp, dxfer_len); + md = NULL; + else + md = &map_data; + + if (md) { + if (!sg_res_in_use(sfp) && dxfer_len <= rsv_schp->bufflen) + sg_link_reserve(sfp, srp, dxfer_len); + else { + res = sg_build_indirect(req_schp, sfp, dxfer_len); + if (res) + return res; + } - if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) - sg_link_reserve(sfp, srp, dxfer_len); + md->pages = req_schp->pages; + md->page_order = req_schp->page_order; + md->nr_entries = req_schp->k_use_sg; + } + + if (iov_count) + res = blk_rq_map_user_iov(q, rq, md, hp->dxferp, iov_count, + hp->dxfer_len, GFP_ATOMIC); else - res = sg_build_indirect(req_schp, sfp, dxfer_len); + res = blk_rq_map_user(q, rq, md, hp->dxferp, + hp->dxfer_len, GFP_ATOMIC); if (!res) { - struct request *rq = srp->rq; - Sg_scatter_hold *schp = &srp->data; - int iovec_count = (int) hp->iovec_count; - - map_data.pages = schp->pages; - map_data.page_order = schp->page_order; - map_data.nr_entries = schp->k_use_sg; - - if (iovec_count) - res = blk_rq_map_user_iov(q, rq, &map_data, hp->dxferp, - iovec_count, - hp->dxfer_len, GFP_ATOMIC); - else - res = blk_rq_map_user(q, rq, &map_data, hp->dxferp, - hp->dxfer_len, GFP_ATOMIC); + srp->bio = rq->bio; - if (!res) - srp->bio = rq->bio; + if (!md) { + req_schp->dio_in_use = 1; + hp->info |= SG_INFO_DIRECT_IO; + } } - return res; } @@ -1730,25 +1735,6 @@ sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) return tablesize; /* number of scat_gath elements allocated */ } -/* Returns: -ve -> error, 0 -> done, 1 -> try indirect */ -static int -sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len) -{ - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - int res; - struct request *rq = srp->rq; - struct request_queue *q = sfp->parentdp->device->request_queue; - - res = blk_rq_map_user(q, rq, NULL, hp->dxferp, dxfer_len, GFP_ATOMIC); - if (res) - return res; - srp->bio = rq->bio; - schp->dio_in_use = 1; - hp->info |= SG_INFO_DIRECT_IO; - return 0; -} - static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) { -- cgit v1.2.3 From c3919af2354fff673026dcbeac6f009d2ce5ceee Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:06 +0900 Subject: sg: remove sg_write_xfer sg_write_xfer was used to copy data from user space for WRITE commands. blk_rq_map_user_iov and blk_rq_map_user do the job so sg_write_xfer does nothing useful. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index c0b6866eece..07bd6833130 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -188,7 +188,6 @@ static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, int read_only, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); -static int sg_write_xfer(Sg_request * srp); static int sg_read_xfer(Sg_request * srp); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); static void sg_remove_scat(Sg_scatter_hold * schp); @@ -736,11 +735,6 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, sg_finish_rem_req(srp); return k; /* probably out of space --> ENOMEM */ } - if ((k = sg_write_xfer(srp))) { - SCSI_LOG_TIMEOUT(1, printk("sg_common_write: write_xfer, bad address\n")); - sg_finish_rem_req(srp); - return k; - } if (sdp->detached) { sg_finish_rem_req(srp); return -ENODEV; @@ -1816,32 +1810,6 @@ out: return -ENOMEM; } -static int -sg_write_xfer(Sg_request * srp) -{ - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - int num_xfer = 0; - int dxfer_dir = hp->dxfer_direction; - int new_interface = ('\0' == hp->interface_id) ? 0 : 1; - - if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) || - (SG_DXFER_TO_FROM_DEV == dxfer_dir)) { - num_xfer = (int) (new_interface ? hp->dxfer_len : hp->flags); - if (schp->bufflen < num_xfer) - num_xfer = schp->bufflen; - } - if ((num_xfer <= 0) || (schp->dio_in_use) || - (new_interface - && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) - return 0; - - SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, k_use_sg=%d\n", - num_xfer, schp->k_use_sg)); - - return 0; -} - static void sg_remove_scat(Sg_scatter_hold * schp) { -- cgit v1.2.3 From 0b6cb26c6686f1f24607c41f0a6d21ce54191710 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:07 +0900 Subject: sg: remove sg_read_xfer sg_read_xfer was used to copy data to user space for READ commands. blk_rq_unmap_user does the job so sg_read_xfer does nothing useful. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 07bd6833130..df8bf67b171 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -188,7 +188,6 @@ static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, int read_only, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); -static int sg_read_xfer(Sg_request * srp); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); static void sg_remove_scat(Sg_scatter_hold * schp); static void sg_build_reserve(Sg_fd * sfp, int req_size); @@ -523,8 +522,11 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) err = -EFAULT; goto err_out; } - err = sg_read_xfer(srp); - err_out: + if (srp->bio) { + err = blk_rq_unmap_user(srp->bio); + srp->bio = NULL; + } +err_out: sg_finish_rem_req(srp); return (0 == err) ? count : err; } @@ -1831,31 +1833,6 @@ sg_remove_scat(Sg_scatter_hold * schp) memset(schp, 0, sizeof (*schp)); } -static int -sg_read_xfer(Sg_request * srp) -{ - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - int num_xfer = 0; - int dxfer_dir = hp->dxfer_direction; - int new_interface = ('\0' == hp->interface_id) ? 0 : 1; - - if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir) - || (SG_DXFER_TO_FROM_DEV == dxfer_dir)) { - num_xfer = hp->dxfer_len; - if (schp->bufflen < num_xfer) - num_xfer = schp->bufflen; - } - if ((num_xfer <= 0) || (schp->dio_in_use) || - (new_interface - && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) - return 0; - - SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", - num_xfer, (int)hp->iovec_count, schp->k_use_sg)); - return 0; -} - static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) { -- cgit v1.2.3 From 4677735f03f5b6b6f2182f457a921855cadfb85b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 2 Sep 2008 22:50:08 +0900 Subject: sg: remove unnecessary blk_rq_unmap_user blk_rq_unmap_user in sg_finish_rem_req can take care of all the cases. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index df8bf67b171..ba9b9bbd4e7 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -522,10 +522,6 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) err = -EFAULT; goto err_out; } - if (srp->bio) { - err = blk_rq_unmap_user(srp->bio); - srp->bio = NULL; - } err_out: sg_finish_rem_req(srp); return (0 == err) ? count : err; @@ -1844,9 +1840,6 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) if ((!outp) || (num_read_xfer <= 0)) return 0; - blk_rq_unmap_user(srp->bio); - srp->bio = NULL; - num = 1 << (PAGE_SHIFT + schp->page_order); for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { if (num > num_read_xfer) { -- cgit v1.2.3 From 9246b5f06deeea541e7c62437c2ad19a0b1172c0 Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Wed, 17 Sep 2008 14:30:32 -0700 Subject: block: Expand Xen blkfront for > 16 xvd Until recently, the maximum number of xvd block devices you could attach to a Xen domU was 16. This limitation turned out to be problematic for some users, so it was expanded to handle a much larger number of disks. However, this requires a couple of changes in the way that blkfront scans for disks. This functionality is already present in the Xen linux-2.6.18-xen.hg tree; the attached patch adds this functionality to the mainline xen-blkfront implementation. I successfully tested it on a 2.6.25 tree, and build tested it on 2.6.27-rc3. Signed-off-by: Chris Lalancette Acked-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 76 ++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3ca643cafcc..bff602ccccf 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -105,15 +105,17 @@ static DEFINE_SPINLOCK(blkif_io_lock); #define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 +#define PARTS_PER_EXT_DISK 256 #define BLKIF_MAJOR(dev) ((dev)>>8) #define BLKIF_MINOR(dev) ((dev) & 0xff) -#define DEV_NAME "xvd" /* name in /dev */ +#define EXT_SHIFT 28 +#define EXTENDED (1<gd != NULL); BUG_ON(info->rq != NULL); - if ((minor % PARTS_PER_DISK) == 0) - nr_minors = PARTS_PER_DISK; + if ((info->vdevice>>EXT_SHIFT) > 1) { + /* this is above the extended range; something is wrong */ + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); + return -ENODEV; + } + + if (!VDEV_IS_EXTENDED(info->vdevice)) { + minor = BLKIF_MINOR(info->vdevice); + nr_parts = PARTS_PER_DISK; + } else { + minor = BLKIF_MINOR_EXT(info->vdevice); + nr_parts = PARTS_PER_EXT_DISK; + } + + if ((minor % nr_parts) == 0) + nr_minors = nr_parts; gd = alloc_disk(nr_minors); if (gd == NULL) goto out; - if (nr_minors > 1) - sprintf(gd->disk_name, "%s%c", DEV_NAME, - 'a' + minor / PARTS_PER_DISK); - else - sprintf(gd->disk_name, "%s%c%d", DEV_NAME, - 'a' + minor / PARTS_PER_DISK, - minor % PARTS_PER_DISK); + offset = minor / nr_parts; + + if (nr_minors > 1) { + if (offset < 26) + sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); + else + sprintf(gd->disk_name, "%s%c%c", DEV_NAME, + 'a' + ((offset / 26)-1), 'a' + (offset % 26)); + } else { + if (offset < 26) + sprintf(gd->disk_name, "%s%c%d", DEV_NAME, + 'a' + offset, + minor & (nr_parts - 1)); + else + sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, + 'a' + ((offset / 26) - 1), + 'a' + (offset % 26), + minor & (nr_parts - 1)); + } gd->major = XENVBD_MAJOR; gd->first_minor = minor; @@ -699,8 +730,13 @@ static int blkfront_probe(struct xenbus_device *dev, err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device", "%i", &vdevice); if (err != 1) { - xenbus_dev_fatal(dev, err, "reading virtual-device"); - return err; + /* go looking in the extended area instead */ + err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", + "%i", &vdevice); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading virtual-device"); + return err; + } } info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -861,9 +897,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err) info->feature_barrier = 0; - err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), - sectors, info->vdevice, - binfo, sector_size, info); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); -- cgit v1.2.3 From 905bd78f2188da69e74966918e3d71df3dff382b Mon Sep 17 00:00:00 2001 From: "scameron@beardog.cca.cpqcorp.net" Date: Fri, 19 Sep 2008 18:27:47 -0700 Subject: cciss: Fix cciss SCSI rescan code to better notice device changes Fix cciss SCSI rescan code to better notice device changes. If you hot-unplug a tape drive, then hot-plug a different tape drive into the same slot in a storage enclosure, the cciss driver wouldn't notice anything had changed, as it was only looking at the LUN address and device type. Now it looks at the inquiry page 0x83 device identifier, and vendor and model strings as well. Signed-off-by: Stephen M. Cameron Signed-off-by: Jens Axboe --- drivers/block/cciss_scsi.c | 151 +++++++++++++++++++++++++++++---------------- drivers/block/cciss_scsi.h | 4 ++ 2 files changed, 102 insertions(+), 53 deletions(-) (limited to 'drivers') diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e1233aabda7..a3fd87b4144 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -365,7 +365,7 @@ struct scsi2map { static int cciss_scsi_add_entry(int ctlr, int hostno, - unsigned char *scsi3addr, int devtype, + struct cciss_scsi_dev_t *device, struct scsi2map *added, int *nadded) { /* assumes hba[ctlr]->scsi_ctlr->lock is held */ @@ -384,12 +384,12 @@ cciss_scsi_add_entry(int ctlr, int hostno, lun = 0; /* Is this device a non-zero lun of a multi-lun device */ /* byte 4 of the 8-byte LUN addr will contain the logical unit no. */ - if (scsi3addr[4] != 0) { + if (device->scsi3addr[4] != 0) { /* Search through our list and find the device which */ /* has the same 8 byte LUN address, excepting byte 4. */ /* Assign the same bus and target for this new LUN. */ /* Use the logical unit number from the firmware. */ - memcpy(addr1, scsi3addr, 8); + memcpy(addr1, device->scsi3addr, 8); addr1[4] = 0; for (i = 0; i < n; i++) { sd = &ccissscsi[ctlr].dev[i]; @@ -399,7 +399,7 @@ cciss_scsi_add_entry(int ctlr, int hostno, if (memcmp(addr1, addr2, 8) == 0) { bus = sd->bus; target = sd->target; - lun = scsi3addr[4]; + lun = device->scsi3addr[4]; break; } } @@ -420,8 +420,12 @@ cciss_scsi_add_entry(int ctlr, int hostno, added[*nadded].lun = sd->lun; (*nadded)++; - memcpy(&sd->scsi3addr[0], scsi3addr, 8); - sd->devtype = devtype; + memcpy(sd->scsi3addr, device->scsi3addr, 8); + memcpy(sd->vendor, device->vendor, sizeof(sd->vendor)); + memcpy(sd->revision, device->revision, sizeof(sd->revision)); + memcpy(sd->device_id, device->device_id, sizeof(sd->device_id)); + sd->devtype = device->devtype; + ccissscsi[ctlr].ndevices++; /* initially, (before registering with scsi layer) we don't @@ -487,6 +491,22 @@ static void fixup_botched_add(int ctlr, char *scsi3addr) CPQ_TAPE_UNLOCK(ctlr, flags); } +static int device_is_the_same(struct cciss_scsi_dev_t *dev1, + struct cciss_scsi_dev_t *dev2) +{ + return dev1->devtype == dev2->devtype && + memcmp(dev1->scsi3addr, dev2->scsi3addr, + sizeof(dev1->scsi3addr)) == 0 && + memcmp(dev1->device_id, dev2->device_id, + sizeof(dev1->device_id)) == 0 && + memcmp(dev1->vendor, dev2->vendor, + sizeof(dev1->vendor)) == 0 && + memcmp(dev1->model, dev2->model, + sizeof(dev1->model)) == 0 && + memcmp(dev1->revision, dev2->revision, + sizeof(dev1->revision)) == 0; +} + static int adjust_cciss_scsi_table(int ctlr, int hostno, struct cciss_scsi_dev_t sd[], int nsds) @@ -532,7 +552,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, for (j=0;jscsi3addr)) { - if (sd[j].devtype == csd->devtype) + if (device_is_the_same(&sd[j], csd)) found=2; else found=1; @@ -548,22 +568,26 @@ adjust_cciss_scsi_table(int ctlr, int hostno, cciss_scsi_remove_entry(ctlr, hostno, i, removed, &nremoved); /* remove ^^^, hence i not incremented */ - } - else if (found == 1) { /* device is different kind */ + } else if (found == 1) { /* device is different in some way */ changes++; - printk("cciss%d: device c%db%dt%dl%d type changed " - "(device type now %s).\n", - ctlr, hostno, csd->bus, csd->target, csd->lun, - scsi_device_type(csd->devtype)); + printk("cciss%d: device c%db%dt%dl%d has changed.\n", + ctlr, hostno, csd->bus, csd->target, csd->lun); cciss_scsi_remove_entry(ctlr, hostno, i, removed, &nremoved); /* remove ^^^, hence i not incremented */ - if (cciss_scsi_add_entry(ctlr, hostno, - &sd[j].scsi3addr[0], sd[j].devtype, + if (cciss_scsi_add_entry(ctlr, hostno, &sd[j], added, &nadded) != 0) /* we just removed one, so add can't fail. */ BUG(); csd->devtype = sd[j].devtype; + memcpy(csd->device_id, sd[j].device_id, + sizeof(csd->device_id)); + memcpy(csd->vendor, sd[j].vendor, + sizeof(csd->vendor)); + memcpy(csd->model, sd[j].model, + sizeof(csd->model)); + memcpy(csd->revision, sd[j].revision, + sizeof(csd->revision)); } else /* device is same as it ever was, */ i++; /* so just move along. */ } @@ -577,7 +601,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, csd = &ccissscsi[ctlr].dev[j]; if (SCSI3ADDR_EQ(sd[i].scsi3addr, csd->scsi3addr)) { - if (sd[i].devtype == csd->devtype) + if (device_is_the_same(&sd[i], csd)) found=2; /* found device */ else found=1; /* found a bug. */ @@ -586,16 +610,14 @@ adjust_cciss_scsi_table(int ctlr, int hostno, } if (!found) { changes++; - if (cciss_scsi_add_entry(ctlr, hostno, - - &sd[i].scsi3addr[0], sd[i].devtype, + if (cciss_scsi_add_entry(ctlr, hostno, &sd[i], added, &nadded) != 0) break; } else if (found == 1) { /* should never happen... */ changes++; - printk("cciss%d: device unexpectedly changed type\n", - ctlr); + printk(KERN_WARNING "cciss%d: device " + "unexpectedly changed\n", ctlr); /* but if it does happen, we just ignore that device */ } } @@ -1012,7 +1034,8 @@ cciss_scsi_interpret_error(CommandList_struct *cp) static int cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, - unsigned char *buf, unsigned char bufsize) + unsigned char page, unsigned char *buf, + unsigned char bufsize) { int rc; CommandList_struct *cp; @@ -1032,8 +1055,8 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, ei = cp->err_info; cdb[0] = CISS_INQUIRY; - cdb[1] = 0; - cdb[2] = 0; + cdb[1] = (page != 0); + cdb[2] = page; cdb[3] = 0; cdb[4] = bufsize; cdb[5] = 0; @@ -1053,6 +1076,25 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, return rc; } +/* Get the device id from inquiry page 0x83 */ +static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr, + unsigned char *device_id, int buflen) +{ + int rc; + unsigned char *buf; + + if (buflen > 16) + buflen = 16; + buf = kzalloc(64, GFP_KERNEL); + if (!buf) + return -1; + rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64); + if (rc == 0) + memcpy(device_id, &buf[8], buflen); + kfree(buf); + return rc != 0; +} + static int cciss_scsi_do_report_phys_luns(ctlr_info_t *c, ReportLunData_struct *buf, int bufsize) @@ -1142,25 +1184,21 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) ctlr_info_t *c; __u32 num_luns=0; unsigned char *ch; - /* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */ - struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; + struct cciss_scsi_dev_t *currentsd, *this_device; int ncurrent=0; int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8; int i; c = (ctlr_info_t *) hba[cntl_num]; ld_buff = kzalloc(reportlunsize, GFP_KERNEL); - if (ld_buff == NULL) { - printk(KERN_ERR "cciss: out of memory\n"); - return; - } inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); - if (inq_buff == NULL) { - printk(KERN_ERR "cciss: out of memory\n"); - kfree(ld_buff); - return; + currentsd = kzalloc(sizeof(*currentsd) * + (CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL); + if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) { + printk(KERN_ERR "cciss: out of memory\n"); + goto out; } - + this_device = ¤tsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) { ch = &ld_buff->LUNListLength[0]; num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8; @@ -1179,23 +1217,34 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) /* adjust our table of devices */ - for(i=0; iLUN[i][3] & 0xC0) continue; memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE); memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8); - if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff, - (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { + if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff, + (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) /* Inquiry failed (msg printed already) */ - devtype = 0; /* so we will skip this device. */ - } else /* what kind of device is this? */ - devtype = (inq_buff[0] & 0x1f); - - switch (devtype) + continue; /* so we will skip this device. */ + + this_device->devtype = (inq_buff[0] & 0x1f); + this_device->bus = -1; + this_device->target = -1; + this_device->lun = -1; + memcpy(this_device->scsi3addr, scsi3addr, 8); + memcpy(this_device->vendor, &inq_buff[8], + sizeof(this_device->vendor)); + memcpy(this_device->model, &inq_buff[16], + sizeof(this_device->model)); + memcpy(this_device->revision, &inq_buff[32], + sizeof(this_device->revision)); + memset(this_device->device_id, 0, + sizeof(this_device->device_id)); + cciss_scsi_get_device_id(hba[cntl_num], scsi3addr, + this_device->device_id, sizeof(this_device->device_id)); + + switch (this_device->devtype) { case 0x05: /* CD-ROM */ { @@ -1220,15 +1269,10 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) { printk(KERN_INFO "cciss%d: %s ignored, " "too many devices.\n", cntl_num, - scsi_device_type(devtype)); + scsi_device_type(this_device->devtype)); break; } - memcpy(¤tsd[ncurrent].scsi3addr[0], - &scsi3addr[0], 8); - currentsd[ncurrent].devtype = devtype; - currentsd[ncurrent].bus = -1; - currentsd[ncurrent].target = -1; - currentsd[ncurrent].lun = -1; + currentsd[ncurrent] = *this_device; ncurrent++; break; default: @@ -1240,6 +1284,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) out: kfree(inq_buff); kfree(ld_buff); + kfree(currentsd); return; } diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h index d9c2c586502..7b750245ae7 100644 --- a/drivers/block/cciss_scsi.h +++ b/drivers/block/cciss_scsi.h @@ -66,6 +66,10 @@ struct cciss_scsi_dev_t { int devtype; int bus, target, lun; /* as presented to the OS */ unsigned char scsi3addr[8]; /* as presented to the HW */ + unsigned char device_id[16]; /* from inquiry pg. 0x83 */ + unsigned char vendor[8]; /* bytes 8-15 of inquiry data */ + unsigned char model[16]; /* bytes 16-31 of inquiry data */ + unsigned char revision[4]; /* bytes 32-35 of inquiry data */ }; struct cciss_scsi_hba_t { -- cgit v1.2.3 From 061837bc8687edc2739ef02f721b7ae0b8076390 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 22 Sep 2008 14:57:16 -0700 Subject: drivers/block: Use DIV_ROUND_UP The kernel.h macro DIV_ROUND_UP performs the computation (((n) + (d) - 1) / (d)) but is perhaps more readable. An extract of the semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @haskernel@ @@ #include @depends on haskernel@ expression n,d; @@ ( - (n + d - 1) / d + DIV_ROUND_UP(n,d) | - (n + (d - 1)) / d + DIV_ROUND_UP(n,d) ) @depends on haskernel@ expression n,d; @@ - DIV_ROUND_UP((n),d) + DIV_ROUND_UP(n,d) @depends on haskernel@ expression n,d; @@ - DIV_ROUND_UP(n,(d)) + DIV_ROUND_UP(n,d) // Signed-off-by: Julia Lawall Cc: Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/cciss.c | 8 ++++---- drivers/block/cpqarray.c | 2 +- drivers/block/floppy.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b73116ef923..1e1f9153000 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3460,8 +3460,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); hba[i]->cmd_pool_bits = - kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG - - 1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL); + kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) + * sizeof(unsigned long), GFP_KERNEL); hba[i]->cmd_pool = (CommandList_struct *) pci_alloc_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct), @@ -3493,8 +3493,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* command and error info recs zeroed out before they are used */ memset(hba[i]->cmd_pool_bits, 0, - ((hba[i]->nr_cmds + BITS_PER_LONG - - 1) / BITS_PER_LONG) * sizeof(unsigned long)); + DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) + * sizeof(unsigned long)); hba[i]->num_luns = 0; hba[i]->highest_lun = -1; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 09c14341e6e..3d967525e9a 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -424,7 +424,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t), &(hba[i]->cmd_pool_dhandle)); hba[i]->cmd_pool_bits = kcalloc( - (NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long), + DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long), GFP_KERNEL); if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 395f8ea7981..9c0b494f5e8 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1355,20 +1355,20 @@ static void fdc_specify(void) } /* Convert step rate from microseconds to milliseconds and 4 bits */ - srt = 16 - (DP->srt * scale_dtr / 1000 + NOMINAL_DTR - 1) / NOMINAL_DTR; + srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR); if (slow_floppy) { srt = srt / 4; } SUPBOUND(srt, 0xf); INFBOUND(srt, 0); - hlt = (DP->hlt * scale_dtr / 2 + NOMINAL_DTR - 1) / NOMINAL_DTR; + hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR); if (hlt < 0x01) hlt = 0x01; else if (hlt > 0x7f) hlt = hlt_max_code; - hut = (DP->hut * scale_dtr / 16 + NOMINAL_DTR - 1) / NOMINAL_DTR; + hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR); if (hut < 0x1) hut = 0x1; else if (hut > 0xf) @@ -2385,7 +2385,7 @@ static void rw_interrupt(void) #ifdef FLOPPY_SANITY_CHECK if (nr_sectors / ssize > - (in_sector_offset + current_count_sectors + ssize - 1) / ssize) { + DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) { DPRINT("long rw: %x instead of %lx\n", nr_sectors, current_count_sectors); printk("rs=%d s=%d\n", R_SECTOR, SECTOR); -- cgit v1.2.3 From 9e49184c82e9ec3ab4d45f9ea5a17ccaf43869f0 Mon Sep 17 00:00:00 2001 From: Keith Wansbrough Date: Mon, 22 Sep 2008 14:57:17 -0700 Subject: floppy: support arbitrary first-sector numbers The current floppy_struct allows floppies to number sectors starting from 0 or 1. This patch allows arbitrary first-sector numbers - for example, 0xC1 for Amstrad CPC disks. This extends the existing 1-bit field (FD_ZEROBASED, bit 2 of stretch) to 8 bits (FD_SECTMASK, bits 2 to 9). Currently 0x00 denotes a first sector number of 1, and 0x01 denotes a first sector number of 0. We extend this by interpreting FD_SECTMASK as the first sector number with the LSB flipped. Signed-off-by: Keith Wansbrough Cc: Alain Knaff Cc: Michael Kerrisk Cc: Karel Zak Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 9c0b494f5e8..cf64ddf5d83 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -423,8 +423,15 @@ static struct floppy_raw_cmd *raw_cmd, default_raw_cmd; * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical * side 0 is on physical side 0 (but with the misnamed sector IDs). * 'stretch' should probably be renamed to something more general, like - * 'options'. Other parameters should be self-explanatory (see also - * setfdprm(8)). + * 'options'. + * + * Bits 2 through 9 of 'stretch' tell the number of the first sector. + * The LSB (bit 2) is flipped. For most disks, the first sector + * is 1 (represented by 0x00<<2). For some CP/M and music sampler + * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2). + * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2). + * + * Other parameters should be self-explanatory (see also setfdprm(8)). */ /* Size @@ -2236,9 +2243,9 @@ static void setup_format_params(int track) } } } - if (_floppy->stretch & FD_ZEROBASED) { + if (_floppy->stretch & FD_SECTBASEMASK) { for (count = 0; count < F_SECT_PER_TRACK; count++) - here[count].sect--; + here[count].sect += FD_SECTBASE(_floppy) - 1; } } @@ -2649,7 +2656,7 @@ static int make_raw_rw_request(void) } HEAD = fsector_t / _floppy->sect; - if (((_floppy->stretch & (FD_SWAPSIDES | FD_ZEROBASED)) || + if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) || TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect) max_sector = _floppy->sect; @@ -2679,7 +2686,7 @@ static int make_raw_rw_request(void) CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + - ((_floppy->stretch & FD_ZEROBASED) ? 0 : 1); + FD_SECTBASE(_floppy); /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -3311,7 +3318,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, g->head <= 0 || g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || /* check if reserved bits are set */ - (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_ZEROBASED)) != 0) + (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0) return -EINVAL; if (type) { if (!capable(CAP_SYS_ADMIN)) @@ -3356,7 +3363,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, if (DRS->maxblock > user_params[drive].sect || DRS->maxtrack || ((user_params[drive].sect ^ oldStretch) & - (FD_SWAPSIDES | FD_ZEROBASED))) + (FD_SWAPSIDES | FD_SECTBASEMASK))) invalidate_drive(bdev); else process_fd_request(); -- cgit v1.2.3 From 8bff7c6b0f63c7ee9c5e3a076338d74125b8debb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Sep 2008 13:05:10 +0200 Subject: libata: set queue SSD flag for SSD devices SSD devices should give an RPM setting of 1 in word 217 of the ID page. If we see such a device, tell the block layer about it. Signed-off-by: Jens Axboe --- drivers/ata/libata-scsi.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index b9d3ba423cb..054370700ab 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -977,6 +977,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); } else { + if (ata_id_is_ssd(dev->id)) + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, + sdev->request_queue); + /* ATA devices must be sector aligned */ blk_queue_update_dma_alignment(sdev->request_queue, ATA_SECT_SIZE - 1); -- cgit v1.2.3 From 8316982ac06d7d8875dc8738efbb030791dc33bb Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:11:20 -0400 Subject: virtio_blk: change to use __blk_end_request() This patch converts virtio_blk to use __blk_end_request() directly so that end_{queued|dequeued}_request() can be removed. Related 'uptodate' argument is converted to 'error'. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Rusty Russell Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 879506a2c23..6ec5fc05278 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -47,20 +47,20 @@ static void blk_done(struct virtqueue *vq) spin_lock_irqsave(&vblk->lock, flags); while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { - int uptodate; + int error; switch (vbr->status) { case VIRTIO_BLK_S_OK: - uptodate = 1; + error = 0; break; case VIRTIO_BLK_S_UNSUPP: - uptodate = -ENOTTY; + error = -ENOTTY; break; default: - uptodate = 0; + error = -EIO; break; } - end_dequeued_request(vbr->req, uptodate); + __blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req)); list_del(&vbr->list); mempool_free(vbr, vblk->pool); } -- cgit v1.2.3 From 2a9df5055a99df25533daf4041fdb99f0ed3463c Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:12:15 -0400 Subject: memstick: change to use __blk_end_request() This patch converts memstick to use __blk_end_request() directly so that end_{queued|dequeued}_request() can be removed. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Alex Dubov Signed-off-by: Jens Axboe --- drivers/memstick/core/mspro_block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 82bf649ef13..6e291bf8237 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -828,7 +828,7 @@ static void mspro_block_submit_req(struct request_queue *q) if (msb->eject) { while ((req = elv_next_request(q)) != NULL) - end_queued_request(req, -ENODEV); + __blk_end_request(req, -ENODEV, blk_rq_bytes(req)); return; } -- cgit v1.2.3 From 7afb3a6e752503d5ebeb038336aa0fa886a51b44 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 10:13:02 -0400 Subject: gdrom: change to use __blk_end_request() This patch converts gdrom to use __blk_end_request() directly so that end_{queued|dequeued}_request() can be removed. gd.transfer is '1' in error cases and '0' in non-error cases, so gdrom hasn't been propagating any error code to the block layer. We can just convert error cases to '-EIO'. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Adrian McMenamin Signed-off-by: Jens Axboe --- drivers/cdrom/gdrom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 1231d95aa69..d6ba77a2dd7 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -624,14 +624,14 @@ static void gdrom_readdisk_dma(struct work_struct *work) ctrl_outb(1, GDROM_DMA_STATUS_REG); wait_event_interruptible_timeout(request_queue, gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); - err = gd.transfer; + err = gd.transfer ? -EIO : 0; gd.transfer = 0; gd.pending = 0; /* now seek to take the request spinlock * before handling ending the request */ spin_lock(&gdrom_lock); list_del_init(&req->queuelist); - end_dequeued_request(req, 1 - err); + __blk_end_request(req, err, blk_rq_bytes(req)); } spin_unlock(&gdrom_lock); kfree(read_command); -- cgit v1.2.3 From 6feef531f55cf4a20fd9eb39f5352e5745203603 Mon Sep 17 00:00:00 2001 From: Denis ChengRq Date: Thu, 9 Oct 2008 08:57:05 +0200 Subject: block: mark bio_split_pool static Since all bio_split calls refer the same single bio_split_pool, the bio_split function can use bio_split_pool directly instead of the mempool_t parameter; then the mempool_t parameter can be removed from bio_split param list, and bio_split_pool is only referred in fs/bio.c file, can be marked static. Signed-off-by: Denis ChengRq Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 +- drivers/md/linear.c | 2 +- drivers/md/raid0.c | 2 +- drivers/md/raid10.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index e1a90bbb474..0e077150568 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2544,7 +2544,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio) if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_sector; - bp = bio_split(bio, bio_split_pool, first_sectors); + bp = bio_split(bio, first_sectors); BUG_ON(!bp); pkt_make_request(q, &bp->bio1); pkt_make_request(q, &bp->bio2); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index c80ea90593d..b9cbee688fa 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -353,7 +353,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) * split it. */ struct bio_pair *bp; - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f52f442a735..53508a8a981 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -427,7 +427,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5f990133f5e..8bdc9bfc288 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -817,7 +817,7 @@ static int make_request(struct request_queue *q, struct bio * bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); if (make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); -- cgit v1.2.3