diff options
Diffstat (limited to 'drivers/scsi/scsi_lib.c')
-rw-r--r-- | drivers/scsi/scsi_lib.c | 361 |
1 files changed, 238 insertions, 123 deletions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 604f4d71793..aac8a02cbe8 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -17,6 +17,7 @@ #include <linux/pci.h> #include <linux/delay.h> #include <linux/hardirq.h> +#include <linux/scatterlist.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -33,35 +34,34 @@ #define SG_MEMPOOL_NR ARRAY_SIZE(scsi_sg_pools) #define SG_MEMPOOL_SIZE 2 +/* + * The maximum number of SG segments that we will put inside a scatterlist + * (unless chaining is used). Should ideally fit inside a single page, to + * avoid a higher order allocation. + */ +#define SCSI_MAX_SG_SEGMENTS 128 + struct scsi_host_sg_pool { size_t size; - char *name; + char *name; struct kmem_cache *slab; mempool_t *pool; }; -#if (SCSI_MAX_PHYS_SEGMENTS < 32) -#error SCSI_MAX_PHYS_SEGMENTS is too small -#endif - -#define SP(x) { x, "sgpool-" #x } +#define SP(x) { x, "sgpool-" #x } static struct scsi_host_sg_pool scsi_sg_pools[] = { SP(8), SP(16), +#if (SCSI_MAX_SG_SEGMENTS > 16) SP(32), -#if (SCSI_MAX_PHYS_SEGMENTS > 32) +#if (SCSI_MAX_SG_SEGMENTS > 32) SP(64), -#if (SCSI_MAX_PHYS_SEGMENTS > 64) +#if (SCSI_MAX_SG_SEGMENTS > 64) SP(128), -#if (SCSI_MAX_PHYS_SEGMENTS > 128) - SP(256), -#if (SCSI_MAX_PHYS_SEGMENTS > 256) -#error SCSI_MAX_PHYS_SEGMENTS is too large -#endif #endif #endif #endif -}; +}; #undef SP static void scsi_run_queue(struct request_queue *q); @@ -288,19 +288,26 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl, { struct request_queue *q = rq->q; int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned int data_len = 0, len, bytes, off; + unsigned int data_len = bufflen, len, bytes, off; + struct scatterlist *sg; struct page *page; struct bio *bio = NULL; int i, err, nr_vecs = 0; - for (i = 0; i < nsegs; i++) { - page = sgl[i].page; - off = sgl[i].offset; - len = sgl[i].length; - data_len += len; + for_each_sg(sgl, sg, nsegs, i) { + page = sg->page; + off = sg->offset; + len = sg->length; + data_len += len; - while (len > 0) { + while (len > 0 && data_len > 0) { + /* + * sg sends a scatterlist that is larger than + * the data_len it wants transferred for certain + * IO sizes + */ bytes = min_t(unsigned int, len, PAGE_SIZE - off); + bytes = min(bytes, data_len); if (!bio) { nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages); @@ -332,12 +339,13 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl, page++; len -= bytes; + data_len -=bytes; off = 0; } } rq->buffer = rq->data = NULL; - rq->data_len = data_len; + rq->data_len = bufflen; return 0; free_bios: @@ -430,6 +438,7 @@ EXPORT_SYMBOL_GPL(scsi_execute_async); static void scsi_init_cmd_errh(struct scsi_cmnd *cmd) { cmd->serial_number = 0; + cmd->resid = 0; memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer); if (cmd->cmd_len == 0) cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]); @@ -688,56 +697,170 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, return NULL; } -struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) -{ - struct scsi_host_sg_pool *sgp; - struct scatterlist *sgl; +/* + * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit + * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. + */ +#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048 - BUG_ON(!cmd->use_sg); +static inline unsigned int scsi_sgtable_index(unsigned short nents) +{ + unsigned int index; - switch (cmd->use_sg) { + switch (nents) { case 1 ... 8: - cmd->sglist_len = 0; + index = 0; break; case 9 ... 16: - cmd->sglist_len = 1; + index = 1; break; +#if (SCSI_MAX_SG_SEGMENTS > 16) case 17 ... 32: - cmd->sglist_len = 2; + index = 2; break; -#if (SCSI_MAX_PHYS_SEGMENTS > 32) +#if (SCSI_MAX_SG_SEGMENTS > 32) case 33 ... 64: - cmd->sglist_len = 3; + index = 3; break; -#if (SCSI_MAX_PHYS_SEGMENTS > 64) +#if (SCSI_MAX_SG_SEGMENTS > 64) case 65 ... 128: - cmd->sglist_len = 4; - break; -#if (SCSI_MAX_PHYS_SEGMENTS > 128) - case 129 ... 256: - cmd->sglist_len = 5; + index = 4; break; #endif #endif #endif default: - return NULL; + printk(KERN_ERR "scsi: bad segment count=%d\n", nents); + BUG(); } - sgp = scsi_sg_pools + cmd->sglist_len; - sgl = mempool_alloc(sgp->pool, gfp_mask); - return sgl; + return index; +} + +struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) +{ + struct scsi_host_sg_pool *sgp; + struct scatterlist *sgl, *prev, *ret; + unsigned int index; + int this, left; + + BUG_ON(!cmd->use_sg); + + left = cmd->use_sg; + ret = prev = NULL; + do { + this = left; + if (this > SCSI_MAX_SG_SEGMENTS) { + this = SCSI_MAX_SG_SEGMENTS - 1; + index = SG_MEMPOOL_NR - 1; + } else + index = scsi_sgtable_index(this); + + left -= this; + + sgp = scsi_sg_pools + index; + + sgl = mempool_alloc(sgp->pool, gfp_mask); + if (unlikely(!sgl)) + goto enomem; + + memset(sgl, 0, sizeof(*sgl) * sgp->size); + + /* + * first loop through, set initial index and return value + */ + if (!ret) + ret = sgl; + + /* + * chain previous sglist, if any. we know the previous + * sglist must be the biggest one, or we would not have + * ended up doing another loop. + */ + if (prev) + sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl); + + /* + * don't allow subsequent mempool allocs to sleep, it would + * violate the mempool principle. + */ + gfp_mask &= ~__GFP_WAIT; + gfp_mask |= __GFP_HIGH; + prev = sgl; + } while (left); + + /* + * ->use_sg may get modified after dma mapping has potentially + * shrunk the number of segments, so keep a copy of it for free. + */ + cmd->__use_sg = cmd->use_sg; + return ret; +enomem: + if (ret) { + /* + * Free entries chained off ret. Since we were trying to + * allocate another sglist, we know that all entries are of + * the max size. + */ + sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1; + prev = ret; + ret = &ret[SCSI_MAX_SG_SEGMENTS - 1]; + + while ((sgl = sg_chain_ptr(ret)) != NULL) { + ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1]; + mempool_free(sgl, sgp->pool); + } + + mempool_free(prev, sgp->pool); + } + return NULL; } EXPORT_SYMBOL(scsi_alloc_sgtable); -void scsi_free_sgtable(struct scatterlist *sgl, int index) +void scsi_free_sgtable(struct scsi_cmnd *cmd) { + struct scatterlist *sgl = cmd->request_buffer; struct scsi_host_sg_pool *sgp; - BUG_ON(index >= SG_MEMPOOL_NR); + /* + * if this is the biggest size sglist, check if we have + * chained parts we need to free + */ + if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) { + unsigned short this, left; + struct scatterlist *next; + unsigned int index; + + left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1); + next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]); + while (left && next) { + sgl = next; + this = left; + if (this > SCSI_MAX_SG_SEGMENTS) { + this = SCSI_MAX_SG_SEGMENTS - 1; + index = SG_MEMPOOL_NR - 1; + } else + index = scsi_sgtable_index(this); + + left -= this; + + sgp = scsi_sg_pools + index; + + if (left) + next = sg_chain_ptr(&sgl[sgp->size - 1]); + + mempool_free(sgl, sgp->pool); + } + + /* + * Restore original, will be freed below + */ + sgl = cmd->request_buffer; + sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1; + } else + sgp = scsi_sg_pools + scsi_sgtable_index(cmd->__use_sg); - sgp = scsi_sg_pools + index; mempool_free(sgl, sgp->pool); } @@ -763,7 +886,7 @@ EXPORT_SYMBOL(scsi_free_sgtable); static void scsi_release_buffers(struct scsi_cmnd *cmd) { if (cmd->use_sg) - scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len); + scsi_free_sgtable(cmd); /* * Zero these out. They now point to freed memory, and it is @@ -924,11 +1047,11 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) break; } } - if (!(req->cmd_flags & REQ_QUIET)) { - scmd_printk(KERN_INFO, cmd, - "Device not ready: "); - scsi_print_sense_hdr("", &sshdr); - } + if (!(req->cmd_flags & REQ_QUIET)) + scsi_cmd_print_sense_hdr(cmd, + "Device not ready", + &sshdr); + scsi_end_request(cmd, 0, this_count, 1); return; case VOLUME_OVERFLOW: @@ -962,7 +1085,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) } scsi_end_request(cmd, 0, this_count, !result); } -EXPORT_SYMBOL(scsi_io_completion); /* * Function: scsi_init_io() @@ -978,7 +1100,6 @@ EXPORT_SYMBOL(scsi_io_completion); static int scsi_init_io(struct scsi_cmnd *cmd) { struct request *req = cmd->request; - struct scatterlist *sgpnt; int count; /* @@ -991,14 +1112,13 @@ static int scsi_init_io(struct scsi_cmnd *cmd) /* * If sg table allocation fails, requeue request later. */ - sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC); - if (unlikely(!sgpnt)) { + cmd->request_buffer = scsi_alloc_sgtable(cmd, GFP_ATOMIC); + if (unlikely(!cmd->request_buffer)) { scsi_unprep_request(req); return BLKPREP_DEFER; } req->buffer = NULL; - cmd->request_buffer = (char *) sgpnt; if (blk_pc_request(req)) cmd->request_bufflen = req->data_len; else @@ -1019,9 +1139,6 @@ static int scsi_init_io(struct scsi_cmnd *cmd) printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors, req->current_nr_sectors); - /* release the command and kill it */ - scsi_release_buffers(cmd); - scsi_put_command(cmd); return BLKPREP_KILL; } @@ -1046,21 +1163,13 @@ static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev, return cmd; } -static void scsi_blk_pc_done(struct scsi_cmnd *cmd) -{ - BUG_ON(!blk_pc_request(cmd->request)); - /* - * This will complete the whole command with uptodate=1 so - * as far as the block layer is concerned the command completed - * successfully. Since this is a REQ_BLOCK_PC command the - * caller should check the request's errors value - */ - scsi_io_completion(cmd, cmd->request_bufflen); -} - -static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) +int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) { struct scsi_cmnd *cmd; + int ret = scsi_prep_state_check(sdev, req); + + if (ret != BLKPREP_OK) + return ret; cmd = scsi_get_cmd_from_req(sdev, req); if (unlikely(!cmd)) @@ -1103,21 +1212,22 @@ static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) cmd->transfersize = req->data_len; cmd->allowed = req->retries; cmd->timeout_per_command = req->timeout; - cmd->done = scsi_blk_pc_done; return BLKPREP_OK; } +EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd); /* * Setup a REQ_TYPE_FS command. These are simple read/write request * from filesystems that still need to be translated to SCSI CDBs from * the ULD. */ -static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) +int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) { struct scsi_cmnd *cmd; - struct scsi_driver *drv; - int ret; + int ret = scsi_prep_state_check(sdev, req); + if (ret != BLKPREP_OK) + return ret; /* * Filesystem requests must transfer data. */ @@ -1127,26 +1237,12 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) if (unlikely(!cmd)) return BLKPREP_DEFER; - ret = scsi_init_io(cmd); - if (unlikely(ret)) - return ret; - - /* - * Initialize the actual SCSI command for this request. - */ - drv = *(struct scsi_driver **)req->rq_disk->private_data; - if (unlikely(!drv->init_command(cmd))) { - scsi_release_buffers(cmd); - scsi_put_command(cmd); - return BLKPREP_KILL; - } - - return BLKPREP_OK; + return scsi_init_io(cmd); } +EXPORT_SYMBOL(scsi_setup_fs_cmnd); -static int scsi_prep_fn(struct request_queue *q, struct request *req) +int scsi_prep_state_check(struct scsi_device *sdev, struct request *req) { - struct scsi_device *sdev = q->queuedata; int ret = BLKPREP_OK; /* @@ -1192,35 +1288,25 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) ret = BLKPREP_KILL; break; } - - if (ret != BLKPREP_OK) - goto out; } + return ret; +} +EXPORT_SYMBOL(scsi_prep_state_check); - switch (req->cmd_type) { - case REQ_TYPE_BLOCK_PC: - ret = scsi_setup_blk_pc_cmnd(sdev, req); - break; - case REQ_TYPE_FS: - ret = scsi_setup_fs_cmnd(sdev, req); - break; - default: - /* - * All other command types are not supported. - * - * Note that these days the SCSI subsystem does not use - * REQ_TYPE_SPECIAL requests anymore. These are only used - * (directly or via blk_insert_request) by non-SCSI drivers. - */ - blk_dump_rq_flags(req, "SCSI bad req"); - ret = BLKPREP_KILL; - break; - } +int scsi_prep_return(struct request_queue *q, struct request *req, int ret) +{ + struct scsi_device *sdev = q->queuedata; - out: switch (ret) { case BLKPREP_KILL: req->errors = DID_NO_CONNECT << 16; + /* release the command and kill it */ + if (req->special) { + struct scsi_cmnd *cmd = req->special; + scsi_release_buffers(cmd); + scsi_put_command(cmd); + req->special = NULL; + } break; case BLKPREP_DEFER: /* @@ -1237,6 +1323,17 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) return ret; } +EXPORT_SYMBOL(scsi_prep_return); + +static int scsi_prep_fn(struct request_queue *q, struct request *req) +{ + struct scsi_device *sdev = q->queuedata; + int ret = BLKPREP_KILL; + + if (req->cmd_type == REQ_TYPE_BLOCK_PC) + ret = scsi_setup_blk_pc_cmnd(sdev, req); + return scsi_prep_return(q, req, ret); +} /* * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else @@ -1546,8 +1643,25 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, if (!q) return NULL; + /* + * this limit is imposed by hardware restrictions + */ blk_queue_max_hw_segments(q, shost->sg_tablesize); - blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS); + + /* + * In the future, sg chaining support will be mandatory and this + * ifdef can then go away. Right now we don't have all archs + * converted, so better keep it safe. + */ +#ifdef ARCH_HAS_SG_CHAIN + if (shost->use_sg_chaining) + blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS); + else + blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS); +#else + blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS); +#endif + blk_queue_max_sectors(q, shost->max_sectors); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); @@ -2210,18 +2324,19 @@ EXPORT_SYMBOL_GPL(scsi_target_unblock); * * Returns virtual address of the start of the mapped page */ -void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, +void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count, size_t *offset, size_t *len) { int i; size_t sg_len = 0, len_complete = 0; + struct scatterlist *sg; struct page *page; WARN_ON(!irqs_disabled()); - for (i = 0; i < sg_count; i++) { + for_each_sg(sgl, sg, sg_count, i) { len_complete = sg_len; /* Complete sg-entries */ - sg_len += sg[i].length; + sg_len += sg->length; if (sg_len > *offset) break; } @@ -2235,10 +2350,10 @@ void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, } /* Offset starting from the beginning of first page in this sg-entry */ - *offset = *offset - len_complete + sg[i].offset; + *offset = *offset - len_complete + sg->offset; /* Assumption: contiguous pages can be accessed as "page + i" */ - page = nth_page(sg[i].page, (*offset >> PAGE_SHIFT)); + page = nth_page(sg->page, (*offset >> PAGE_SHIFT)); *offset &= ~PAGE_MASK; /* Bytes in this sg-entry from *offset to the end of the page */ |