From 8f3d8ba20e67991b531e9c0227dcd1f99271a32c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2009 19:55:13 +0200 Subject: block: move bio list helpers into bio.h It's used by DM and MD and generally useful, so move the bio list helpers into bio.h. Signed-off-by: Christoph Hellwig Acked-by: Alasdair G Kergon Signed-off-by: Jens Axboe --- include/linux/bio.h | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index b900d2c67d2..b89cf2d8289 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -504,6 +504,115 @@ static inline int bio_has_data(struct bio *bio) return bio && bio->bi_io_vec != NULL; } +/* + * BIO list managment for use by remapping drivers (e.g. DM or MD). + * + * A bio_list anchors a singly-linked list of bios chained through the bi_next + * member of the bio. The bio_list also caches the last list member to allow + * fast access to the tail. + */ +struct bio_list { + struct bio *head; + struct bio *tail; +}; + +static inline int bio_list_empty(const struct bio_list *bl) +{ + return bl->head == NULL; +} + +static inline void bio_list_init(struct bio_list *bl) +{ + bl->head = bl->tail = NULL; +} + +#define bio_list_for_each(bio, bl) \ + for (bio = (bl)->head; bio; bio = bio->bi_next) + +static inline unsigned bio_list_size(const struct bio_list *bl) +{ + unsigned sz = 0; + struct bio *bio; + + bio_list_for_each(bio, bl) + sz++; + + return sz; +} + +static inline void bio_list_add(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = NULL; + + if (bl->tail) + bl->tail->bi_next = bio; + else + bl->head = bio; + + bl->tail = bio; +} + +static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = bl->head; + + bl->head = bio; + + if (!bl->tail) + bl->tail = bio; +} + +static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->tail) + bl->tail->bi_next = bl2->head; + else + bl->head = bl2->head; + + bl->tail = bl2->tail; +} + +static inline void bio_list_merge_head(struct bio_list *bl, + struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->head) + bl2->tail->bi_next = bl->head; + else + bl->tail = bl2->tail; + + bl->head = bl2->head; +} + +static inline struct bio *bio_list_pop(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + if (bio) { + bl->head = bl->head->bi_next; + if (!bl->head) + bl->tail = NULL; + + bio->bi_next = NULL; + } + + return bio; +} + +static inline struct bio *bio_list_get(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + bl->head = bl->tail = NULL; + + return bio; +} + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -- cgit v1.2.3 From 48e70bc18ac81881dedd3aa327c55b924fc41ecf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Apr 2009 08:19:27 +0200 Subject: Document and move the various READ/WRITE types It's a somewhat twisty maze of hints and behavioural modifiers, try and clear it up a bit with some documentation. Signed-off-by: Jens Axboe --- include/linux/fs.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 562d2855cf3..b535aec4406 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,6 +87,60 @@ struct inodes_stat_t { */ #define FMODE_NOCMTIME ((__force fmode_t)2048) +/* + * The below are the various read and write types that we support. Some of + * them include behavioral modifiers that send information down to the + * block layer and IO scheduler. Terminology: + * + * The block layer uses device plugging to defer IO a little bit, in + * the hope that we will see more IO very shortly. This increases + * coalescing of adjacent IO and thus reduces the number of IOs we + * have to send to the device. It also allows for better queuing, + * if the IO isn't mergeable. If the caller is going to be waiting + * for the IO, then he must ensure that the device is unplugged so + * that the IO is dispatched to the driver. + * + * All IO is handled async in Linux. This is fine for background + * writes, but for reads or writes that someone waits for completion + * on, we want to notify the block layer and IO scheduler so that they + * know about it. That allows them to make better scheduling + * decisions. So when the below references 'sync' and 'async', it + * is referencing this priority hint. + * + * With that in mind, the available types are: + * + * READ A normal read operation. Device will be plugged. + * READ_SYNC A synchronous read. Device is not plugged, caller can + * immediately wait on this read without caring about + * unplugging. + * READA Used for read-ahead operations. Lower priority, and the + * block layer could (in theory) choose to ignore this + * request if it runs into resource problems. + * WRITE A normal async write. Device will be plugged. + * SWRITE Like WRITE, but a special case for ll_rw_block() that + * tells it to lock the buffer first. Normally a buffer + * must be locked before doing IO. + * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down + * the hint that someone will be waiting on this IO + * shortly. The device must still be unplugged explicitly, + * WRITE_SYNC_PLUG does not do this as we could be + * submitting more writes before we actually wait on any + * of them. + * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device + * immediately after submission. The write equivalent + * of READ_SYNC. + * WRITE_ODIRECT Special case write for O_DIRECT only. + * SWRITE_SYNC + * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. + * See SWRITE. + * WRITE_BARRIER Like WRITE, but tells the block layer that all + * previously submitted writes must be safely on storage + * before this one is started. Also guarantees that when + * this write is complete, it itself is also safely on + * storage. Prevents reordering of writes on both sides + * of this IO. + * + */ #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 @@ -102,6 +156,11 @@ struct inodes_stat_t { (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) + +/* + * These aren't really reads or writes, they pass down information about + * parts of device that are now unused by the file system. + */ #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) -- cgit v1.2.3 From b3c2d2ddd63944ef2a1e4a43077b602288107e01 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:36 +0200 Subject: splice: split up __splice_from_pipe() Split up __splice_from_pipe() into four helper functions: splice_from_pipe_begin() splice_from_pipe_next() splice_from_pipe_feed() splice_from_pipe_end() splice_from_pipe_next() will wait (if necessary) for more buffers to be added to the pipe. splice_from_pipe_feed() will feed the buffers to the supplied actor and return when there's no more data available (or if all of the requested data has been copied). This is necessary so that implementations can do locking around the non-waiting splice_from_pipe_feed(). This patch should not cause any change in behavior. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- include/linux/splice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/splice.h b/include/linux/splice.h index 528dcb93c2f..8fc2a635586 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -36,6 +36,8 @@ struct splice_desc { void *data; /* cookie */ } u; loff_t pos; /* file position */ + size_t num_spliced; /* number of bytes already spliced */ + bool need_wakeup; /* need to wake up writer */ }; struct partial_page { @@ -66,6 +68,14 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *, splice_actor *); extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct splice_desc *, splice_actor *); +extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *, + splice_actor *); +extern int splice_from_pipe_next(struct pipe_inode_info *, + struct splice_desc *); +extern void splice_from_pipe_begin(struct splice_desc *); +extern void splice_from_pipe_end(struct pipe_inode_info *, + struct splice_desc *); + extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, -- cgit v1.2.3 From 328eaaba4e41a04c1dc4679d65bea3fee4349d86 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:39 +0200 Subject: ocfs2: fix i_mutex locking in ocfs2_splice_to_file() Rearrange locking of i_mutex on destination and call to ocfs2_rw_lock() so locks are only held while buffers are copied with the pipe_to_file() actor, and not while waiting for more data on the pipe. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- include/linux/splice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/splice.h b/include/linux/splice.h index 8fc2a635586..5f3faa9d15a 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -75,6 +75,8 @@ extern int splice_from_pipe_next(struct pipe_inode_info *, extern void splice_from_pipe_begin(struct splice_desc *); extern void splice_from_pipe_end(struct pipe_inode_info *, struct splice_desc *); +extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *, + struct splice_desc *); extern ssize_t splice_to_pipe(struct pipe_inode_info *, struct splice_pipe_desc *); -- cgit v1.2.3 From f8cc774ce4844811a55e2352f1443055e3994e28 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:40 +0200 Subject: splice: remove generic_file_splice_write_nolock() Remove the now unused generic_file_splice_write_nolock() function. It's conceptually broken anyway, because splice may need to wait for pipe events so holding locks across the whole operation is wrong. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b535aec4406..907d8f56c6f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2209,8 +2209,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); -extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, - struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, -- cgit v1.2.3 From 61e0d47c33cc371f725bcda4a47ae0efe652dba8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Apr 2009 19:48:41 +0200 Subject: splice: add helpers for locking pipe inode There are lots of sequences like this, especially in splice code: if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); /* do something */ if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); so introduce helpers which do the conditional locking and unlocking. Also replace the inode_double_lock() call with a pipe_double_lock() helper to avoid spreading the use of this functionality beyond the pipe code. This patch is just a cleanup, and should cause no behavioral changes. Signed-off-by: Miklos Szeredi Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- include/linux/pipe_fs_i.h | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 907d8f56c6f..e766be0d432 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -797,9 +797,6 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; -extern void inode_double_lock(struct inode *inode1, struct inode *inode2); -extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); - /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8e4120285f7..c8f038554e8 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -134,6 +134,11 @@ struct pipe_buf_operations { memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE +/* Pipe lock and unlock operations */ +void pipe_lock(struct pipe_inode_info *); +void pipe_unlock(struct pipe_inode_info *); +void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); -- cgit v1.2.3