From 4c54ac62dceecedd82d4a865017bba0b738e2897 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 18 Feb 2008 13:48:31 +0100 Subject: make struct def_blk_aops static This patch makes the needlessly global struct def_blk_aops static. Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 98ffb6ead43..b84b848431f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1590,7 +1590,6 @@ extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct block_device *open_by_devnum(dev_t, unsigned); -extern const struct address_space_operations def_blk_aops; #else static inline void bd_forget(struct inode *inode) {} #endif -- cgit v1.2.3 From 86b6c7a7f78feca58d2d8615e53aee4d59ab9dc6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 18 Feb 2008 13:48:32 +0100 Subject: fs/block_dev.c: remove #if 0'ed code Commit b2e895dbd80c420bfc0937c3729b4afe073b3848 #if 0'ed this code stating: <-- snip --> [PATCH] revert blockdev direct io back to 2.6.19 version Andrew Vasquez is reporting as-iosched oopses and a 65% throughput slowdown due to the recent special-casing of direct-io against blockdevs. We don't know why either of these things are occurring. The patch minimally reverts us back to the 2.6.19 code for a 2.6.20 release. <-- snip --> It has since been dead code, and unless someone wants to revive it now it's time to remove it. This patch also makes bio_release_pages() static again and removes the ki_bio_count member from struct kiocb, reverting changes that had been done for this dead code. Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- include/linux/aio.h | 1 - include/linux/bio.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/aio.h b/include/linux/aio.h index a9931e2e562..0d0b7f629bd 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -105,7 +105,6 @@ struct kiocb { wait_queue_t ki_wait; loff_t ki_pos; - atomic_t ki_bio_count; /* num bio used for this iocb */ void *private; /* State that we remember to be able to restart/retry */ unsigned short ki_opcode; diff --git a/include/linux/bio.h b/include/linux/bio.h index 4da441337d6..4c59bdccd3e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -326,7 +326,6 @@ extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern void bio_release_pages(struct bio *bio); extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); -- cgit v1.2.3 From ffc4e7595734cf768fa60cea8a4d545dfef8231a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Feb 2008 10:02:29 +0100 Subject: cfq-iosched: add hlist for browsing parallel to the radix tree It's cumbersome to browse a radix tree from start to finish, especially since we modify keys when a process exits. So add a hlist for the single purpose of browsing over all known cfq_io_contexts, used for exit, io prio change, etc. This fixes http://bugzilla.kernel.org/show_bug.cgi?id=9948 Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 593b222d9dc..1b4ccf25b4d 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -50,6 +50,7 @@ struct cfq_io_context { sector_t seek_mean; struct list_head queue_list; + struct hlist_node cic_list; void (*dtor)(struct io_context *); /* destructor */ void (*exit)(struct io_context *); /* called on task exit */ @@ -77,6 +78,7 @@ struct io_context { struct as_io_context *aic; struct radix_tree_root radix_root; + struct hlist_head cic_list; void *ioc_data; }; -- cgit v1.2.3 From 6b00769fe1502b4ad97bb327ef7ac971b208bfb5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 19 Feb 2008 11:36:35 +0100 Subject: block: add request->raw_data_len With padding and draining moved into it, block layer now may extend requests as directed by queue parameters, so now a request has two sizes - the original request size and the extended size which matches the size of area pointed to by bios and later by sgs. The latter size is what lower layers are primarily interested in when allocating, filling up DMA tables and setting up the controller. Both padding and draining extend the data area to accomodate controller characteristics. As any controller which speaks SCSI can handle underflows, feeding larger data area is safe. So, this patch makes the primary data length field, request->data_len, indicate the size of full data area and add a separate length field, request->raw_data_len, for the unmodified request size. The latter is used to report to higher layer (userland) and where the original request size should be fed to the controller or device. Signed-off-by: Tejun Heo Cc: James Bottomley Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e1888cc5b8a..f1fe9fbf1c0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -216,6 +216,7 @@ struct request { unsigned int cmd_len; unsigned char cmd[BLK_MAX_CDB]; + unsigned int raw_data_len; unsigned int data_len; unsigned int sense_len; void *data; -- cgit v1.2.3 From 2fb98e8414c42cb14698833aac640b143b9ade4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 19 Feb 2008 11:36:53 +0100 Subject: block: implement request_queue->dma_drain_needed Draining shouldn't be done for commands where overflow may indicate data integrity issues. Add dma_drain_needed callback to request_queue. Drain buffer is appened iff this function returns non-zero. Signed-off-by: Tejun Heo Cc: James Bottomley Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f1fe9fbf1c0..6fe67d1939c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -259,6 +259,7 @@ struct bio_vec; typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); +typedef int (dma_drain_needed_fn)(struct request *); enum blk_queue_state { Queue_down, @@ -295,6 +296,7 @@ struct request_queue merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; + dma_drain_needed_fn *dma_drain_needed; /* * Dispatch queue sorting @@ -699,8 +701,9 @@ extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); -extern int blk_queue_dma_drain(struct request_queue *q, void *buf, - unsigned int size); +extern int blk_queue_dma_drain(struct request_queue *q, + dma_drain_needed_fn *dma_drain_needed, + void *buf, unsigned int size); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); -- cgit v1.2.3 From dde2020754aeb14e17052d61784dcb37f252aac2 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 19 Feb 2008 11:36:56 +0100 Subject: libata: eliminate the home grown dma padding in favour of that provided by the block layer ATA requires that all DMA transfers begin and end on word boundaries. Because of this, a large amount of machinery grew up in ide to adjust scatterlists on this basis. However, as of 2.5, the block layer has a dma_alignment variable which ensures both the beginning and length of a DMA transfer are aligned on the dma_alignment boundary. Although the block layer does adjust the beginning of the transfer to ensure this happens, it doesn't actually adjust the length, it merely makes sure that space is allocated for transfers beyond the declared length. The upshot of this is that scatterlists may be padded to any size between the actual length and the length adjusted to the dma_alignment safely knowing that memory is allocated in this region. Right at the moment, SCSI takes the default dma_aligment which is on a 512 byte boundary. Note that this aligment only applies to transfers coming in from user space. However, since all kernel allocations are automatically aligned on a minimum of 32 byte boundaries, it is safe to adjust them in this manner as well. tj: * Adjusting sg after padding is done in block layer. Make libata set queue alignment correctly for ATAPI devices and drop broken sg mangling from ata_sg_setup(). * Use request->raw_data_len for ATAPI transfer chunk size. * Killed qc->raw_nbytes. * Separated out killing qc->n_iter. Signed-off-by: James Bottomley Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/libata.h | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index bc5a8d0c709..2e098f940ce 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -278,7 +278,6 @@ enum { /* size of buffer to pad xfers ending on unaligned boundaries */ ATA_DMA_PAD_SZ = 4, - ATA_DMA_PAD_BUF_SZ = ATA_DMA_PAD_SZ * ATA_MAX_QUEUE, /* ering size */ ATA_ERING_SIZE = 32, @@ -457,24 +456,18 @@ struct ata_queued_cmd { unsigned long flags; /* ATA_QCFLAG_xxx */ unsigned int tag; unsigned int n_elem; - unsigned int mapped_n_elem; int dma_dir; - unsigned int pad_len; unsigned int sect_size; unsigned int nbytes; - unsigned int raw_nbytes; unsigned int curbytes; struct scatterlist *cursg; unsigned int cursg_ofs; - struct scatterlist *last_sg; - struct scatterlist saved_last_sg; struct scatterlist sgent; - struct scatterlist extra_sg[2]; struct scatterlist *sg; @@ -619,9 +612,6 @@ struct ata_port { struct ata_prd *prd; /* our SG list */ dma_addr_t prd_dma; /* and its DMA mapping */ - void *pad; /* array of DMA pad buffers */ - dma_addr_t pad_dma; - struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */ u8 ctl; /* cache of ATA control register */ @@ -1363,12 +1353,9 @@ static inline void ata_qc_reinit(struct ata_queued_cmd *qc) qc->flags = 0; qc->cursg = NULL; qc->cursg_ofs = 0; - qc->nbytes = qc->raw_nbytes = qc->curbytes = 0; + qc->nbytes = qc->curbytes = 0; qc->n_elem = 0; - qc->mapped_n_elem = 0; qc->err_mask = 0; - qc->pad_len = 0; - qc->last_sg = NULL; qc->sect_size = ATA_SECT_SIZE; ata_tf_init(qc->dev, &qc->tf); @@ -1423,19 +1410,6 @@ static inline unsigned int __ac_err_mask(u8 status) return mask; } -static inline int ata_pad_alloc(struct ata_port *ap, struct device *dev) -{ - ap->pad_dma = 0; - ap->pad = dmam_alloc_coherent(dev, ATA_DMA_PAD_BUF_SZ, - &ap->pad_dma, GFP_KERNEL); - return (ap->pad == NULL) ? -ENOMEM : 0; -} - -static inline void ata_pad_free(struct ata_port *ap, struct device *dev) -{ - dmam_free_coherent(dev, ATA_DMA_PAD_BUF_SZ, ap->pad, ap->pad_dma); -} - static inline struct ata_port *ata_shost_to_port(struct Scsi_Host *host) { return *(struct ata_port **)&host->hostdata[0]; -- cgit v1.2.3