aboutsummaryrefslogtreecommitdiff
path: root/drivers/scsi/scsi_lib.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/scsi_lib.c')
-rw-r--r--drivers/scsi/scsi_lib.c361
1 files changed, 238 insertions, 123 deletions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 604f4d71793..aac8a02cbe8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -17,6 +17,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/hardirq.h>
+#include <linux/scatterlist.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -33,35 +34,34 @@
#define SG_MEMPOOL_NR ARRAY_SIZE(scsi_sg_pools)
#define SG_MEMPOOL_SIZE 2
+/*
+ * The maximum number of SG segments that we will put inside a scatterlist
+ * (unless chaining is used). Should ideally fit inside a single page, to
+ * avoid a higher order allocation.
+ */
+#define SCSI_MAX_SG_SEGMENTS 128
+
struct scsi_host_sg_pool {
size_t size;
- char *name;
+ char *name;
struct kmem_cache *slab;
mempool_t *pool;
};
-#if (SCSI_MAX_PHYS_SEGMENTS < 32)
-#error SCSI_MAX_PHYS_SEGMENTS is too small
-#endif
-
-#define SP(x) { x, "sgpool-" #x }
+#define SP(x) { x, "sgpool-" #x }
static struct scsi_host_sg_pool scsi_sg_pools[] = {
SP(8),
SP(16),
+#if (SCSI_MAX_SG_SEGMENTS > 16)
SP(32),
-#if (SCSI_MAX_PHYS_SEGMENTS > 32)
+#if (SCSI_MAX_SG_SEGMENTS > 32)
SP(64),
-#if (SCSI_MAX_PHYS_SEGMENTS > 64)
+#if (SCSI_MAX_SG_SEGMENTS > 64)
SP(128),
-#if (SCSI_MAX_PHYS_SEGMENTS > 128)
- SP(256),
-#if (SCSI_MAX_PHYS_SEGMENTS > 256)
-#error SCSI_MAX_PHYS_SEGMENTS is too large
-#endif
#endif
#endif
#endif
-};
+};
#undef SP
static void scsi_run_queue(struct request_queue *q);
@@ -288,19 +288,26 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
{
struct request_queue *q = rq->q;
int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned int data_len = 0, len, bytes, off;
+ unsigned int data_len = bufflen, len, bytes, off;
+ struct scatterlist *sg;
struct page *page;
struct bio *bio = NULL;
int i, err, nr_vecs = 0;
- for (i = 0; i < nsegs; i++) {
- page = sgl[i].page;
- off = sgl[i].offset;
- len = sgl[i].length;
- data_len += len;
+ for_each_sg(sgl, sg, nsegs, i) {
+ page = sg->page;
+ off = sg->offset;
+ len = sg->length;
+ data_len += len;
- while (len > 0) {
+ while (len > 0 && data_len > 0) {
+ /*
+ * sg sends a scatterlist that is larger than
+ * the data_len it wants transferred for certain
+ * IO sizes
+ */
bytes = min_t(unsigned int, len, PAGE_SIZE - off);
+ bytes = min(bytes, data_len);
if (!bio) {
nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
@@ -332,12 +339,13 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
page++;
len -= bytes;
+ data_len -=bytes;
off = 0;
}
}
rq->buffer = rq->data = NULL;
- rq->data_len = data_len;
+ rq->data_len = bufflen;
return 0;
free_bios:
@@ -430,6 +438,7 @@ EXPORT_SYMBOL_GPL(scsi_execute_async);
static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
{
cmd->serial_number = 0;
+ cmd->resid = 0;
memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
if (cmd->cmd_len == 0)
cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
@@ -688,56 +697,170 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
return NULL;
}
-struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
-{
- struct scsi_host_sg_pool *sgp;
- struct scatterlist *sgl;
+/*
+ * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
+ * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
+ */
+#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048
- BUG_ON(!cmd->use_sg);
+static inline unsigned int scsi_sgtable_index(unsigned short nents)
+{
+ unsigned int index;
- switch (cmd->use_sg) {
+ switch (nents) {
case 1 ... 8:
- cmd->sglist_len = 0;
+ index = 0;
break;
case 9 ... 16:
- cmd->sglist_len = 1;
+ index = 1;
break;
+#if (SCSI_MAX_SG_SEGMENTS > 16)
case 17 ... 32:
- cmd->sglist_len = 2;
+ index = 2;
break;
-#if (SCSI_MAX_PHYS_SEGMENTS > 32)
+#if (SCSI_MAX_SG_SEGMENTS > 32)
case 33 ... 64:
- cmd->sglist_len = 3;
+ index = 3;
break;
-#if (SCSI_MAX_PHYS_SEGMENTS > 64)
+#if (SCSI_MAX_SG_SEGMENTS > 64)
case 65 ... 128:
- cmd->sglist_len = 4;
- break;
-#if (SCSI_MAX_PHYS_SEGMENTS > 128)
- case 129 ... 256:
- cmd->sglist_len = 5;
+ index = 4;
break;
#endif
#endif
#endif
default:
- return NULL;
+ printk(KERN_ERR "scsi: bad segment count=%d\n", nents);
+ BUG();
}
- sgp = scsi_sg_pools + cmd->sglist_len;
- sgl = mempool_alloc(sgp->pool, gfp_mask);
- return sgl;
+ return index;
+}
+
+struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
+{
+ struct scsi_host_sg_pool *sgp;
+ struct scatterlist *sgl, *prev, *ret;
+ unsigned int index;
+ int this, left;
+
+ BUG_ON(!cmd->use_sg);
+
+ left = cmd->use_sg;
+ ret = prev = NULL;
+ do {
+ this = left;
+ if (this > SCSI_MAX_SG_SEGMENTS) {
+ this = SCSI_MAX_SG_SEGMENTS - 1;
+ index = SG_MEMPOOL_NR - 1;
+ } else
+ index = scsi_sgtable_index(this);
+
+ left -= this;
+
+ sgp = scsi_sg_pools + index;
+
+ sgl = mempool_alloc(sgp->pool, gfp_mask);
+ if (unlikely(!sgl))
+ goto enomem;
+
+ memset(sgl, 0, sizeof(*sgl) * sgp->size);
+
+ /*
+ * first loop through, set initial index and return value
+ */
+ if (!ret)
+ ret = sgl;
+
+ /*
+ * chain previous sglist, if any. we know the previous
+ * sglist must be the biggest one, or we would not have
+ * ended up doing another loop.
+ */
+ if (prev)
+ sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
+
+ /*
+ * don't allow subsequent mempool allocs to sleep, it would
+ * violate the mempool principle.
+ */
+ gfp_mask &= ~__GFP_WAIT;
+ gfp_mask |= __GFP_HIGH;
+ prev = sgl;
+ } while (left);
+
+ /*
+ * ->use_sg may get modified after dma mapping has potentially
+ * shrunk the number of segments, so keep a copy of it for free.
+ */
+ cmd->__use_sg = cmd->use_sg;
+ return ret;
+enomem:
+ if (ret) {
+ /*
+ * Free entries chained off ret. Since we were trying to
+ * allocate another sglist, we know that all entries are of
+ * the max size.
+ */
+ sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+ prev = ret;
+ ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];
+
+ while ((sgl = sg_chain_ptr(ret)) != NULL) {
+ ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
+ mempool_free(sgl, sgp->pool);
+ }
+
+ mempool_free(prev, sgp->pool);
+ }
+ return NULL;
}
EXPORT_SYMBOL(scsi_alloc_sgtable);
-void scsi_free_sgtable(struct scatterlist *sgl, int index)
+void scsi_free_sgtable(struct scsi_cmnd *cmd)
{
+ struct scatterlist *sgl = cmd->request_buffer;
struct scsi_host_sg_pool *sgp;
- BUG_ON(index >= SG_MEMPOOL_NR);
+ /*
+ * if this is the biggest size sglist, check if we have
+ * chained parts we need to free
+ */
+ if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
+ unsigned short this, left;
+ struct scatterlist *next;
+ unsigned int index;
+
+ left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
+ next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
+ while (left && next) {
+ sgl = next;
+ this = left;
+ if (this > SCSI_MAX_SG_SEGMENTS) {
+ this = SCSI_MAX_SG_SEGMENTS - 1;
+ index = SG_MEMPOOL_NR - 1;
+ } else
+ index = scsi_sgtable_index(this);
+
+ left -= this;
+
+ sgp = scsi_sg_pools + index;
+
+ if (left)
+ next = sg_chain_ptr(&sgl[sgp->size - 1]);
+
+ mempool_free(sgl, sgp->pool);
+ }
+
+ /*
+ * Restore original, will be freed below
+ */
+ sgl = cmd->request_buffer;
+ sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+ } else
+ sgp = scsi_sg_pools + scsi_sgtable_index(cmd->__use_sg);
- sgp = scsi_sg_pools + index;
mempool_free(sgl, sgp->pool);
}
@@ -763,7 +886,7 @@ EXPORT_SYMBOL(scsi_free_sgtable);
static void scsi_release_buffers(struct scsi_cmnd *cmd)
{
if (cmd->use_sg)
- scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
+ scsi_free_sgtable(cmd);
/*
* Zero these out. They now point to freed memory, and it is
@@ -924,11 +1047,11 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
break;
}
}
- if (!(req->cmd_flags & REQ_QUIET)) {
- scmd_printk(KERN_INFO, cmd,
- "Device not ready: ");
- scsi_print_sense_hdr("", &sshdr);
- }
+ if (!(req->cmd_flags & REQ_QUIET))
+ scsi_cmd_print_sense_hdr(cmd,
+ "Device not ready",
+ &sshdr);
+
scsi_end_request(cmd, 0, this_count, 1);
return;
case VOLUME_OVERFLOW:
@@ -962,7 +1085,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
}
scsi_end_request(cmd, 0, this_count, !result);
}
-EXPORT_SYMBOL(scsi_io_completion);
/*
* Function: scsi_init_io()
@@ -978,7 +1100,6 @@ EXPORT_SYMBOL(scsi_io_completion);
static int scsi_init_io(struct scsi_cmnd *cmd)
{
struct request *req = cmd->request;
- struct scatterlist *sgpnt;
int count;
/*
@@ -991,14 +1112,13 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
/*
* If sg table allocation fails, requeue request later.
*/
- sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
- if (unlikely(!sgpnt)) {
+ cmd->request_buffer = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
+ if (unlikely(!cmd->request_buffer)) {
scsi_unprep_request(req);
return BLKPREP_DEFER;
}
req->buffer = NULL;
- cmd->request_buffer = (char *) sgpnt;
if (blk_pc_request(req))
cmd->request_bufflen = req->data_len;
else
@@ -1019,9 +1139,6 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
req->current_nr_sectors);
- /* release the command and kill it */
- scsi_release_buffers(cmd);
- scsi_put_command(cmd);
return BLKPREP_KILL;
}
@@ -1046,21 +1163,13 @@ static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev,
return cmd;
}
-static void scsi_blk_pc_done(struct scsi_cmnd *cmd)
-{
- BUG_ON(!blk_pc_request(cmd->request));
- /*
- * This will complete the whole command with uptodate=1 so
- * as far as the block layer is concerned the command completed
- * successfully. Since this is a REQ_BLOCK_PC command the
- * caller should check the request's errors value
- */
- scsi_io_completion(cmd, cmd->request_bufflen);
-}
-
-static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
+int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd;
+ int ret = scsi_prep_state_check(sdev, req);
+
+ if (ret != BLKPREP_OK)
+ return ret;
cmd = scsi_get_cmd_from_req(sdev, req);
if (unlikely(!cmd))
@@ -1103,21 +1212,22 @@ static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
cmd->transfersize = req->data_len;
cmd->allowed = req->retries;
cmd->timeout_per_command = req->timeout;
- cmd->done = scsi_blk_pc_done;
return BLKPREP_OK;
}
+EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
/*
* Setup a REQ_TYPE_FS command. These are simple read/write request
* from filesystems that still need to be translated to SCSI CDBs from
* the ULD.
*/
-static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
+int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd;
- struct scsi_driver *drv;
- int ret;
+ int ret = scsi_prep_state_check(sdev, req);
+ if (ret != BLKPREP_OK)
+ return ret;
/*
* Filesystem requests must transfer data.
*/
@@ -1127,26 +1237,12 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
if (unlikely(!cmd))
return BLKPREP_DEFER;
- ret = scsi_init_io(cmd);
- if (unlikely(ret))
- return ret;
-
- /*
- * Initialize the actual SCSI command for this request.
- */
- drv = *(struct scsi_driver **)req->rq_disk->private_data;
- if (unlikely(!drv->init_command(cmd))) {
- scsi_release_buffers(cmd);
- scsi_put_command(cmd);
- return BLKPREP_KILL;
- }
-
- return BLKPREP_OK;
+ return scsi_init_io(cmd);
}
+EXPORT_SYMBOL(scsi_setup_fs_cmnd);
-static int scsi_prep_fn(struct request_queue *q, struct request *req)
+int scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
{
- struct scsi_device *sdev = q->queuedata;
int ret = BLKPREP_OK;
/*
@@ -1192,35 +1288,25 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
ret = BLKPREP_KILL;
break;
}
-
- if (ret != BLKPREP_OK)
- goto out;
}
+ return ret;
+}
+EXPORT_SYMBOL(scsi_prep_state_check);
- switch (req->cmd_type) {
- case REQ_TYPE_BLOCK_PC:
- ret = scsi_setup_blk_pc_cmnd(sdev, req);
- break;
- case REQ_TYPE_FS:
- ret = scsi_setup_fs_cmnd(sdev, req);
- break;
- default:
- /*
- * All other command types are not supported.
- *
- * Note that these days the SCSI subsystem does not use
- * REQ_TYPE_SPECIAL requests anymore. These are only used
- * (directly or via blk_insert_request) by non-SCSI drivers.
- */
- blk_dump_rq_flags(req, "SCSI bad req");
- ret = BLKPREP_KILL;
- break;
- }
+int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
+{
+ struct scsi_device *sdev = q->queuedata;
- out:
switch (ret) {
case BLKPREP_KILL:
req->errors = DID_NO_CONNECT << 16;
+ /* release the command and kill it */
+ if (req->special) {
+ struct scsi_cmnd *cmd = req->special;
+ scsi_release_buffers(cmd);
+ scsi_put_command(cmd);
+ req->special = NULL;
+ }
break;
case BLKPREP_DEFER:
/*
@@ -1237,6 +1323,17 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
return ret;
}
+EXPORT_SYMBOL(scsi_prep_return);
+
+static int scsi_prep_fn(struct request_queue *q, struct request *req)
+{
+ struct scsi_device *sdev = q->queuedata;
+ int ret = BLKPREP_KILL;
+
+ if (req->cmd_type == REQ_TYPE_BLOCK_PC)
+ ret = scsi_setup_blk_pc_cmnd(sdev, req);
+ return scsi_prep_return(q, req, ret);
+}
/*
* scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
@@ -1546,8 +1643,25 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
if (!q)
return NULL;
+ /*
+ * this limit is imposed by hardware restrictions
+ */
blk_queue_max_hw_segments(q, shost->sg_tablesize);
- blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
+
+ /*
+ * In the future, sg chaining support will be mandatory and this
+ * ifdef can then go away. Right now we don't have all archs
+ * converted, so better keep it safe.
+ */
+#ifdef ARCH_HAS_SG_CHAIN
+ if (shost->use_sg_chaining)
+ blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+ else
+ blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#else
+ blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#endif
+
blk_queue_max_sectors(q, shost->max_sectors);
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
blk_queue_segment_boundary(q, shost->dma_boundary);
@@ -2210,18 +2324,19 @@ EXPORT_SYMBOL_GPL(scsi_target_unblock);
*
* Returns virtual address of the start of the mapped page
*/
-void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
+void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
size_t *offset, size_t *len)
{
int i;
size_t sg_len = 0, len_complete = 0;
+ struct scatterlist *sg;
struct page *page;
WARN_ON(!irqs_disabled());
- for (i = 0; i < sg_count; i++) {
+ for_each_sg(sgl, sg, sg_count, i) {
len_complete = sg_len; /* Complete sg-entries */
- sg_len += sg[i].length;
+ sg_len += sg->length;
if (sg_len > *offset)
break;
}
@@ -2235,10 +2350,10 @@ void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
}
/* Offset starting from the beginning of first page in this sg-entry */
- *offset = *offset - len_complete + sg[i].offset;
+ *offset = *offset - len_complete + sg->offset;
/* Assumption: contiguous pages can be accessed as "page + i" */
- page = nth_page(sg[i].page, (*offset >> PAGE_SHIFT));
+ page = nth_page(sg->page, (*offset >> PAGE_SHIFT));
*offset &= ~PAGE_MASK;
/* Bytes in this sg-entry from *offset to the end of the page */