diff options
Diffstat (limited to 'fs')
82 files changed, 4298 insertions, 4605 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 7e7a04be127..e647200262a 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -61,10 +61,14 @@ static int adfs_readpage(struct file *file, struct page *page) return block_read_full_page(page, adfs_get_block); } -static int adfs_prepare_write(struct file *file, struct page *page, unsigned int from, unsigned int to) +static int adfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return cont_prepare_write(page, from, to, adfs_get_block, - &ADFS_I(page->mapping->host)->mmu_private); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + adfs_get_block, + &ADFS_I(mapping->host)->mmu_private); } static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) @@ -76,8 +80,8 @@ static const struct address_space_operations adfs_aops = { .readpage = adfs_readpage, .writepage = adfs_writepage, .sync_page = block_sync_page, - .prepare_write = adfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = adfs_write_begin, + .write_end = generic_write_end, .bmap = _adfs_bmap }; diff --git a/fs/affs/file.c b/fs/affs/file.c index c314a35f091..6e0c9399200 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -395,25 +395,33 @@ static int affs_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, affs_get_block, wbc); } + static int affs_readpage(struct file *file, struct page *page) { return block_read_full_page(page, affs_get_block); } -static int affs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) + +static int affs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return cont_prepare_write(page, from, to, affs_get_block, - &AFFS_I(page->mapping->host)->mmu_private); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + affs_get_block, + &AFFS_I(mapping->host)->mmu_private); } + static sector_t _affs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,affs_get_block); } + const struct address_space_operations affs_aops = { .readpage = affs_readpage, .writepage = affs_writepage, .sync_page = block_sync_page, - .prepare_write = affs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = affs_write_begin, + .write_end = generic_write_end, .bmap = _affs_bmap }; @@ -603,54 +611,65 @@ affs_readpage_ofs(struct file *file, struct page *page) return err; } -static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned from, unsigned to) +static int affs_write_begin_ofs(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct inode *inode = page->mapping->host; - u32 size, offset; - u32 tmp; + struct inode *inode = mapping->host; + struct page *page; + pgoff_t index; int err = 0; - pr_debug("AFFS: prepare_write(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); - offset = page->index << PAGE_CACHE_SHIFT; - if (offset + from > AFFS_I(inode)->mmu_private) { - err = affs_extent_file_ofs(inode, offset + from); + pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len); + if (pos > AFFS_I(inode)->mmu_private) { + /* XXX: this probably leaves a too-big i_size in case of + * failure. Should really be updating i_size at write_end time + */ + err = affs_extent_file_ofs(inode, pos); if (err) return err; } - size = inode->i_size; + + index = pos >> PAGE_CACHE_SHIFT; + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; if (PageUptodate(page)) return 0; - if (from) { - err = affs_do_readpage_ofs(file, page, 0, from); - if (err) - return err; - } - if (to < PAGE_CACHE_SIZE) { - zero_user_page(page, to, PAGE_CACHE_SIZE - to, KM_USER0); - if (size > offset + to) { - if (size < offset + PAGE_CACHE_SIZE) - tmp = size & ~PAGE_CACHE_MASK; - else - tmp = PAGE_CACHE_SIZE; - err = affs_do_readpage_ofs(file, page, to, tmp); - } + /* XXX: inefficient but safe in the face of short writes */ + err = affs_do_readpage_ofs(file, page, 0, PAGE_CACHE_SIZE); + if (err) { + unlock_page(page); + page_cache_release(page); } return err; } -static int affs_commit_write_ofs(struct file *file, struct page *page, unsigned from, unsigned to) +static int affs_write_end_ofs(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; struct super_block *sb = inode->i_sb; struct buffer_head *bh, *prev_bh; char *data; u32 bidx, boff, bsize; + unsigned from, to; u32 tmp; int written; - pr_debug("AFFS: commit_write(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); + from = pos & (PAGE_CACHE_SIZE - 1); + to = pos + len; + /* + * XXX: not sure if this can handle short copies (len < copied), but + * we don't have to, because the page should always be uptodate here, + * due to write_begin. + */ + + pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len); bsize = AFFS_SB(sb)->s_data_blksize; data = page_address(page); @@ -748,6 +767,9 @@ done: if (tmp > inode->i_size) inode->i_size = AFFS_I(inode)->mmu_private = tmp; + unlock_page(page); + page_cache_release(page); + return written; out: @@ -761,8 +783,8 @@ const struct address_space_operations affs_aops_ofs = { .readpage = affs_readpage_ofs, //.writepage = affs_writepage_ofs, //.sync_page = affs_sync_page_ofs, - .prepare_write = affs_prepare_write_ofs, - .commit_write = affs_commit_write_ofs + .write_begin = affs_write_begin_ofs, + .write_end = affs_write_end_ofs }; /* Free any preallocated blocks. */ @@ -805,18 +827,13 @@ affs_truncate(struct inode *inode) if (inode->i_size > AFFS_I(inode)->mmu_private) { struct address_space *mapping = inode->i_mapping; struct page *page; - u32 size = inode->i_size - 1; + void *fsdata; + u32 size = inode->i_size; int res; - page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); - if (!page) - return; - size = (size & (PAGE_CACHE_SIZE - 1)) + 1; - res = mapping->a_ops->prepare_write(NULL, page, size, size); + res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); if (!res) - res = mapping->a_ops->commit_write(NULL, page, size, size); - unlock_page(page); - page_cache_release(page); + res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata); mark_inode_dirty(inode); return; } else if (inode->i_size == AFFS_I(inode)->mmu_private) diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 24310e9ee05..911b4ccf470 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -145,9 +145,13 @@ static int bfs_readpage(struct file *file, struct page *page) return block_read_full_page(page, bfs_get_block); } -static int bfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int bfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page, from, to, bfs_get_block); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, + pagep, fsdata, bfs_get_block); } static sector_t bfs_bmap(struct address_space *mapping, sector_t block) @@ -159,8 +163,8 @@ const struct address_space_operations bfs_aops = { .readpage = bfs_readpage, .writepage = bfs_writepage, .sync_page = block_sync_page, - .prepare_write = bfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = bfs_write_begin, + .write_end = generic_write_end, .bmap = bfs_bmap, }; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b1013f34085..f3037c645ca 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1725,7 +1725,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) &page, &vma) <= 0) { DUMP_SEEK(PAGE_SIZE); } else { - if (page == ZERO_PAGE(addr)) { + if (page == ZERO_PAGE(0)) { if (!dump_seek(file, PAGE_SIZE)) { page_cache_release(page); goto end_coredump; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 2f5d8dbe676..c5ca2f0aca7 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1488,7 +1488,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size, &page, &vma) <= 0) { DUMP_SEEK(file->f_pos + PAGE_SIZE); } - else if (page == ZERO_PAGE(addr)) { + else if (page == ZERO_PAGE(0)) { page_cache_release(page); DUMP_SEEK(file->f_pos + PAGE_SIZE); } @@ -109,11 +109,14 @@ static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned lon void bio_free(struct bio *bio, struct bio_set *bio_set) { - const int pool_idx = BIO_POOL_IDX(bio); + if (bio->bi_io_vec) { + const int pool_idx = BIO_POOL_IDX(bio); - BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); + BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); + + mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); + } - mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); mempool_free(bio, bio_set->bio_pool); } @@ -127,21 +130,9 @@ static void bio_fs_destructor(struct bio *bio) void bio_init(struct bio *bio) { - bio->bi_next = NULL; - bio->bi_bdev = NULL; + memset(bio, 0, sizeof(*bio)); bio->bi_flags = 1 << BIO_UPTODATE; - bio->bi_rw = 0; - bio->bi_vcnt = 0; - bio->bi_idx = 0; - bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; - bio->bi_hw_front_size = 0; - bio->bi_hw_back_size = 0; - bio->bi_size = 0; - bio->bi_max_vecs = 0; - bio->bi_end_io = NULL; atomic_set(&bio->bi_cnt, 1); - bio->bi_private = NULL; } /** diff --git a/fs/block_dev.c b/fs/block_dev.c index 6339a30879b..379a446e243 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -378,14 +378,26 @@ static int blkdev_readpage(struct file * file, struct page * page) return block_read_full_page(page, blkdev_get_block); } -static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page, from, to, blkdev_get_block); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + blkdev_get_block); } -static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - return block_commit_write(page, from, to); + int ret; + ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + unlock_page(page); + page_cache_release(page); + + return ret; } /* @@ -1327,8 +1339,8 @@ const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .writepage = blkdev_writepage, .sync_page = block_sync_page, - .prepare_write = blkdev_prepare_write, - .commit_write = blkdev_commit_write, + .write_begin = blkdev_write_begin, + .write_end = blkdev_write_end, .writepages = generic_writepages, .direct_IO = blkdev_direct_IO, }; diff --git a/fs/buffer.c b/fs/buffer.c index 75b51dfa5e0..faceb5eecca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -110,10 +110,14 @@ static void buffer_io_error(struct buffer_head *bh) } /* - * Default synchronous end-of-IO handler.. Just mark it up-to-date and - * unlock the buffer. This is what ll_rw_block uses too. + * End-of-IO handler helper function which does not touch the bh after + * unlocking it. + * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but + * a race there is benign: unlock_buffer() only use the bh's address for + * hashing after unlocking the buffer, so it doesn't actually touch the bh + * itself. */ -void end_buffer_read_sync(struct buffer_head *bh, int uptodate) +static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) { if (uptodate) { set_buffer_uptodate(bh); @@ -122,6 +126,15 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate) clear_buffer_uptodate(bh); } unlock_buffer(bh); +} + +/* + * Default synchronous end-of-IO handler.. Just mark it up-to-date and + * unlock the buffer. This is what ll_rw_block uses too. + */ +void end_buffer_read_sync(struct buffer_head *bh, int uptodate) +{ + __end_buffer_read_notouch(bh, uptodate); put_bh(bh); } @@ -1757,6 +1770,48 @@ recover: goto done; } +/* + * If a page has any new buffers, zero them out here, and mark them uptodate + * and dirty so they'll be written out (in order to prevent uninitialised + * block data from leaking). And clear the new bit. + */ +void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) +{ + unsigned int block_start, block_end; + struct buffer_head *head, *bh; + + BUG_ON(!PageLocked(page)); + if (!page_has_buffers(page)) + return; + + bh = head = page_buffers(page); + block_start = 0; + do { + block_end = block_start + bh->b_size; + + if (buffer_new(bh)) { + if (block_end > from && block_start < to) { + if (!PageUptodate(page)) { + unsigned start, size; + + start = max(from, block_start); + size = min(to, block_end) - start; + + zero_user_page(page, start, size, KM_USER0); + set_buffer_uptodate(bh); + } + + clear_buffer_new(bh); + mark_buffer_dirty(bh); + } + } + + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); +} +EXPORT_SYMBOL(page_zero_new_buffers); + static int __block_prepare_write(struct inode *inode, struct page *page, unsigned from, unsigned to, get_block_t *get_block) { @@ -1800,7 +1855,9 @@ static int __block_prepare_write(struct inode *inode, struct page *page, unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); if (PageUptodate(page)) { + clear_buffer_new(bh); set_buffer_uptodate(bh); + mark_buffer_dirty(bh); continue; } if (block_end > to || block_start < from) { @@ -1839,38 +1896,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (!buffer_uptodate(*wait_bh)) err = -EIO; } - if (!err) { - bh = head; - do { - if (buffer_new(bh)) - clear_buffer_new(bh); - } while ((bh = bh->b_this_page) != head); - return 0; - } - /* Error case: */ - /* - * Zero out any newly allocated blocks to avoid exposing stale - * data. If BH_New is set, we know that the block was newly - * allocated in the above loop. - */ - bh = head; - block_start = 0; - do { - block_end = block_start+blocksize; - if (block_end <= from) - goto next_bh; - if (block_start >= to) - break; - if (buffer_new(bh)) { - clear_buffer_new(bh); - zero_user_page(page, block_start, bh->b_size, KM_USER0); - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - } -next_bh: - block_start = block_end; - bh = bh->b_this_page; - } while (bh != head); + if (unlikely(err)) + page_zero_new_buffers(page, from, to); return err; } @@ -1895,6 +1922,7 @@ static int __block_commit_write(struct inode *inode, struct page *page, set_buffer_uptodate(bh); mark_buffer_dirty(bh); } + clear_buffer_new(bh); } /* @@ -1909,6 +1937,130 @@ static int __block_commit_write(struct inode *inode, struct page *page, } /* + * block_write_begin takes care of the basic task of block allocation and + * bringing partial write blocks uptodate first. + * + * If *pagep is not NULL, then block_write_begin uses the locked page + * at *pagep rather than allocating its own. In this case, the page will + * not be unlocked or deallocated on failure. + */ +int block_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block) +{ + struct inode *inode = mapping->host; + int status = 0; + struct page *page; + pgoff_t index; + unsigned start, end; + int ownpage = 0; + + index = pos >> PAGE_CACHE_SHIFT; + start = pos & (PAGE_CACHE_SIZE - 1); + end = start + len; + + page = *pagep; + if (page == NULL) { + ownpage = 1; + page = __grab_cache_page(mapping, index); + if (!page) { + status = -ENOMEM; + goto out; + } + *pagep = page; + } else + BUG_ON(!PageLocked(page)); + + status = __block_prepare_write(inode, page, start, end, get_block); + if (unlikely(status)) { + ClearPageUptodate(page); + + if (ownpage) { + unlock_page(page); + page_cache_release(page); + *pagep = NULL; + + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. Don't need + * i_size_read because we hold i_mutex. + */ + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); + } + goto out; + } + +out: + return status; +} +EXPORT_SYMBOL(block_write_begin); + +int block_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + unsigned start; + + start = pos & (PAGE_CACHE_SIZE - 1); + + if (unlikely(copied < len)) { + /* + * The buffers that were written will now be uptodate, so we + * don't have to worry about a readpage reading them and + * overwriting a partial write. However if we have encountered + * a short write and only partially written into a buffer, it + * will not be marked uptodate, so a readpage might come in and + * destroy our partial write. + * + * Do the simplest thing, and just treat any short write to a + * non uptodate page as a zero-length write, and force the + * caller to redo the whole thing. + */ + if (!PageUptodate(page)) + copied = 0; + + page_zero_new_buffers(page, start+copied, start+len); + } + flush_dcache_page(page); + + /* This could be a short (even 0-length) commit */ + __block_commit_write(inode, page, start, start+copied); + + return copied; +} +EXPORT_SYMBOL(block_write_end); + +int generic_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + /* + * No need to use i_size_read() here, the i_size + * cannot change under us because we hold i_mutex. + * + * But it's important to update i_size while still holding page lock: + * page writeout could otherwise come in and zero beyond i_size. + */ + if (pos+copied > inode->i_size) { + i_size_write(inode, pos+copied); + mark_inode_dirty(inode); + } + + unlock_page(page); + page_cache_release(page); + + return copied; +} +EXPORT_SYMBOL(generic_write_end); + +/* * Generic "read page" function for block devices that have the normal * get_block functionality. This is most of the block device filesystems. * Reads the page asynchronously --- the unlock_buffer() and @@ -2004,14 +2156,14 @@ int block_read_full_page(struct page *page, get_block_t *get_block) } /* utility function for filesystems that need to do work on expanding - * truncates. Uses prepare/commit_write to allow the filesystem to + * truncates. Uses filesystem pagecache writes to allow the filesystem to * deal with the hole. */ -static int __generic_cont_expand(struct inode *inode, loff_t size, - pgoff_t index, unsigned int offset) +int generic_cont_expand_simple(struct inode *inode, loff_t size) { struct address_space *mapping = inode->i_mapping; struct page *page; + void *fsdata; unsigned long limit; int err; @@ -2024,140 +2176,115 @@ static int __generic_cont_expand(struct inode *inode, loff_t size, if (size > inode->i_sb->s_maxbytes) goto out; - err = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - goto out; - err = mapping->a_ops->prepare_write(NULL, page, offset, offset); - if (err) { - /* - * ->prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. - */ - unlock_page(page); - page_cache_release(page); - vmtruncate(inode, inode->i_size); + err = pagecache_write_begin(NULL, mapping, size, 0, + AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND, + &page, &fsdata); + if (err) goto out; - } - err = mapping->a_ops->commit_write(NULL, page, offset, offset); + err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata); + BUG_ON(err > 0); - unlock_page(page); - page_cache_release(page); - if (err > 0) - err = 0; out: return err; } -int generic_cont_expand(struct inode *inode, loff_t size) +int cont_expand_zero(struct file *file, struct address_space *mapping, + loff_t pos, loff_t *bytes) { - pgoff_t index; - unsigned int offset; + struct inode *inode = mapping->host; + unsigned blocksize = 1 << inode->i_blkbits; + struct page *page; + void *fsdata; + pgoff_t index, curidx; + loff_t curpos; + unsigned zerofrom, offset, len; + int err = 0; - offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + offset = pos & ~PAGE_CACHE_MASK; - /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { - /* caller must handle this extra byte. */ - offset++; - } - index = size >> PAGE_CACHE_SHIFT; + while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) { + zerofrom = curpos & ~PAGE_CACHE_MASK; + if (zerofrom & (blocksize-1)) { + *bytes |= (blocksize-1); + (*bytes)++; + } + len = PAGE_CACHE_SIZE - zerofrom; - return __generic_cont_expand(inode, size, index, offset); -} + err = pagecache_write_begin(file, mapping, curpos, len, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (err) + goto out; + zero_user_page(page, zerofrom, len, KM_USER0); + err = pagecache_write_end(file, mapping, curpos, len, len, + page, fsdata); + if (err < 0) + goto out; + BUG_ON(err != len); + err = 0; + } -int generic_cont_expand_simple(struct inode *inode, loff_t size) -{ - loff_t pos = size - 1; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; + /* page covers the boundary, find the boundary offset */ + if (index == curidx) { + zerofrom = curpos & ~PAGE_CACHE_MASK; + /* if we will expand the thing last block will be filled */ + if (offset <= zerofrom) { + goto out; + } + if (zerofrom & (blocksize-1)) { + *bytes |= (blocksize-1); + (*bytes)++; + } + len = offset - zerofrom; - /* prepare/commit_write can handle even if from==to==start of block. */ - return __generic_cont_expand(inode, size, index, offset); + err = pagecache_write_begin(file, mapping, curpos, len, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (err) + goto out; + zero_user_page(page, zerofrom, len, KM_USER0); + err = pagecache_write_end(file, mapping, curpos, len, len, + page, fsdata); + if (err < 0) + goto out; + BUG_ON(err != len); + err = 0; + } +out: + return err; } /* * For moronic filesystems that do not allow holes in file. * We may have to extend the file. */ - -int cont_prepare_write(struct page *page, unsigned offset, - unsigned to, get_block_t *get_block, loff_t *bytes) +int cont_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block, loff_t *bytes) { - struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct page *new_page; - pgoff_t pgpos; - long status; - unsigned zerofrom; unsigned blocksize = 1 << inode->i_blkbits; + unsigned zerofrom; + int err; - while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { - status = -ENOMEM; - new_page = grab_cache_page(mapping, pgpos); - if (!new_page) - goto out; - /* we might sleep */ - if (*bytes>>PAGE_CACHE_SHIFT != pgpos) { - unlock_page(new_page); - page_cache_release(new_page); - continue; - } - zerofrom = *bytes & ~PAGE_CACHE_MASK; - if (zerofrom & (blocksize-1)) { - *bytes |= (blocksize-1); - (*bytes)++; - } - status = __block_prepare_write(inode, new_page, zerofrom, - PAGE_CACHE_SIZE, get_block); - if (status) - goto out_unmap; - zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom, - KM_USER0); - generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); - unlock_page(new_page); - page_cache_release(new_page); - } - - if (page->index < pgpos) { - /* completely inside the area */ - zerofrom = offset; - } else { - /* page covers the boundary, find the boundary offset */ - zerofrom = *bytes & ~PAGE_CACHE_MASK; - - /* if we will expand the thing last block will be filled */ - if (to > zerofrom && (zerofrom & (blocksize-1))) { - *bytes |= (blocksize-1); - (*bytes)++; - } + err = cont_expand_zero(file, mapping, pos, bytes); + if (err) + goto out; - /* starting below the boundary? Nothing to zero out */ - if (offset <= zerofrom) - zerofrom = offset; - } - status = __block_prepare_write(inode, page, zerofrom, to, get_block); - if (status) - goto out1; - if (zerofrom < offset) { - zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0); - __block_commit_write(inode, page, zerofrom, offset); + zerofrom = *bytes & ~PAGE_CACHE_MASK; + if (pos+len > *bytes && zerofrom & (blocksize-1)) { + *bytes |= (blocksize-1); + (*bytes)++; } - return 0; -out1: - ClearPageUptodate(page); - return status; -out_unmap: - ClearPageUptodate(new_page); - unlock_page(new_page); - page_cache_release(new_page); + *pagep = NULL; + err = block_write_begin(file, mapping, pos, len, + flags, pagep, fsdata, get_block); out: - return status; + return err; } int block_prepare_write(struct page *page, unsigned from, unsigned to, @@ -2242,81 +2369,129 @@ out_unlock: } /* - * nobh_prepare_write()'s prereads are special: the buffer_heads are freed + * nobh_write_begin()'s prereads are special: the buffer_heads are freed * immediately, while under the page lock. So it needs a special end_io * handler which does not touch the bh after unlocking it. - * - * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but - * a race there is benign: unlock_buffer() only use the bh's address for - * hashing after unlocking the buffer, so it doesn't actually touch the bh - * itself. */ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate) { - if (uptodate) { - set_buffer_uptodate(bh); - } else { - /* This happens, due to failed READA attempts. */ - clear_buffer_uptodate(bh); - } - unlock_buffer(bh); + __end_buffer_read_notouch(bh, uptodate); +} + +/* + * Attach the singly-linked list of buffers created by nobh_write_begin, to + * the page (converting it to circular linked list and taking care of page + * dirty races). + */ +static void attach_nobh_buffers(struct page *page, struct buffer_head *head) +{ + struct buffer_head *bh; + + BUG_ON(!PageLocked(page)); + + spin_lock(&page->mapping->private_lock); + bh = head; + do { + if (PageDirty(page)) + set_buffer_dirty(bh); + if (!bh->b_this_page) + bh->b_this_page = head; + bh = bh->b_this_page; + } while (bh != head); + attach_page_buffers(page, head); + spin_unlock(&page->mapping->private_lock); } /* * On entry, the page is fully not uptodate. * On exit the page is fully uptodate in the areas outside (from,to) */ -int nobh_prepare_write(struct page *page, unsigned from, unsigned to, +int nobh_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, get_block_t *get_block) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocksize = 1 << blkbits; - struct buffer_head map_bh; - struct buffer_head *read_bh[MAX_BUF_PER_PAGE]; + struct buffer_head *head, *bh; + struct page *page; + pgoff_t index; + unsigned from, to; unsigned block_in_page; - unsigned block_start; + unsigned block_start, block_end; sector_t block_in_file; char *kaddr; int nr_reads = 0; - int i; int ret = 0; int is_mapped_to_disk = 1; + index = pos >> PAGE_CACHE_SHIFT; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + *fsdata = NULL; + + if (page_has_buffers(page)) { + unlock_page(page); + page_cache_release(page); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, + fsdata, get_block); + } + if (PageMappedToDisk(page)) return 0; + /* + * Allocate buffers so that we can keep track of state, and potentially + * attach them to the page if an error occurs. In the common case of + * no error, they will just be freed again without ever being attached + * to the page (which is all OK, because we're under the page lock). + * + * Be careful: the buffer linked list is a NULL terminated one, rather + * than the circular one we're used to. + */ + head = alloc_page_buffers(page, blocksize, 0); + if (!head) { + ret = -ENOMEM; + goto out_release; + } + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); - map_bh.b_page = page; /* * We loop across all blocks in the page, whether or not they are * part of the affected region. This is so we can discover if the * page is fully mapped-to-disk. */ - for (block_start = 0, block_in_page = 0; + for (block_start = 0, block_in_page = 0, bh = head; block_start < PAGE_CACHE_SIZE; - block_in_page++, block_start += blocksize) { - unsigned block_end = block_start + blocksize; + block_in_page++, block_start += blocksize, bh = bh->b_this_page) { int create; - map_bh.b_state = 0; + block_end = block_start + blocksize; + bh->b_state = 0; create = 1; if (block_start >= to) create = 0; - map_bh.b_size = blocksize; ret = get_block(inode, block_in_file + block_in_page, - &map_bh, create); + bh, create); if (ret) goto failed; - if (!buffer_mapped(&map_bh)) + if (!buffer_mapped(bh)) is_mapped_to_disk = 0; - if (buffer_new(&map_bh)) - unmap_underlying_metadata(map_bh.b_bdev, - map_bh.b_blocknr); - if (PageUptodate(page)) + if (buffer_new(bh)) + unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + if (PageUptodate(page)) { + set_buffer_uptodate(bh); continue; - if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) { + } + if (buffer_new(bh) || !buffer_mapped(bh)) { kaddr = kmap_atomic(page, KM_USER0); if (block_start < from) memset(kaddr+block_start, 0, from-block_start); @@ -2326,49 +2501,26 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, kunmap_atomic(kaddr, KM_USER0); continue; } - if (buffer_uptodate(&map_bh)) + if (buffer_uptodate(bh)) continue; /* reiserfs does this */ if (block_start < from || block_end > to) { - struct buffer_head *bh = alloc_buffer_head(GFP_NOFS); - - if (!bh) { - ret = -ENOMEM; - goto failed; - } - bh->b_state = map_bh.b_state; - atomic_set(&bh->b_count, 0); - bh->b_this_page = NULL; - bh->b_page = page; - bh->b_blocknr = map_bh.b_blocknr; - bh->b_size = blocksize; - bh->b_data = (char *)(long)block_start; - bh->b_bdev = map_bh.b_bdev; - bh->b_private = NULL; - read_bh[nr_reads++] = bh; + lock_buffer(bh); + bh->b_end_io = end_buffer_read_nobh; + submit_bh(READ, bh); + nr_reads++; } } if (nr_reads) { - struct buffer_head *bh; - /* * The page is locked, so these buffers are protected from * any VM or truncate activity. Hence we don't need to care * for the buffer_head refcounts. */ - for (i = 0; i < nr_reads; i++) { - bh = read_bh[i]; - lock_buffer(bh); - bh->b_end_io = end_buffer_read_nobh; - submit_bh(READ, bh); - } - for (i = 0; i < nr_reads; i++) { - bh = read_bh[i]; + for (bh = head; bh; bh = bh->b_this_page) { wait_on_buffer(bh); if (!buffer_uptodate(bh)) ret = -EIO; - free_buffer_head(bh); - read_bh[i] = NULL; } if (ret) goto failed; @@ -2377,44 +2529,70 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, if (is_mapped_to_disk) SetPageMappedToDisk(page); + *fsdata = head; /* to be released by nobh_write_end */ + return 0; failed: - for (i = 0; i < nr_reads; i++) { - if (read_bh[i]) - free_buffer_head(read_bh[i]); - } - + BUG_ON(!ret); /* - * Error recovery is pretty slack. Clear the page and mark it dirty - * so we'll later zero out any blocks which _were_ allocated. + * Error recovery is a bit difficult. We need to zero out blocks that + * were newly allocated, and dirty them to ensure they get written out. + * Buffers need to be attached to the page at this point, otherwise + * the handling of potential IO errors during writeout would be hard + * (could try doing synchronous writeout, but what if that fails too?) */ - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); - SetPageUptodate(page); - set_page_dirty(page); + attach_nobh_buffers(page, head); + page_zero_new_buffers(page, from, to); + +out_release: + unlock_page(page); + page_cache_release(page); + *pagep = NULL; + + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); + return ret; } -EXPORT_SYMBOL(nobh_prepare_write); +EXPORT_SYMBOL(nobh_write_begin); -/* - * Make sure any changes to nobh_commit_write() are reflected in - * nobh_truncate_page(), since it doesn't call commit_write(). - */ -int nobh_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +int nobh_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + struct buffer_head *head = NULL; + struct buffer_head *bh; + + if (!PageMappedToDisk(page)) { + if (unlikely(copied < len) && !page_has_buffers(page)) + attach_nobh_buffers(page, head); + if (page_has_buffers(page)) + return generic_write_end(file, mapping, pos, len, + copied, page, fsdata); + } SetPageUptodate(page); set_page_dirty(page); - if (pos > inode->i_size) { - i_size_write(inode, pos); + if (pos+copied > inode->i_size) { + i_size_write(inode, pos+copied); mark_inode_dirty(inode); } - return 0; + + unlock_page(page); + page_cache_release(page); + + head = fsdata; + while (head) { + bh = head; + head = head->b_this_page; + free_buffer_head(bh); + } + + return copied; } -EXPORT_SYMBOL(nobh_commit_write); +EXPORT_SYMBOL(nobh_write_end); /* * nobh_writepage() - based on block_full_write_page() except @@ -2467,44 +2645,79 @@ out: } EXPORT_SYMBOL(nobh_writepage); -/* - * This function assumes that ->prepare_write() uses nobh_prepare_write(). - */ -int nobh_truncate_page(struct address_space *mapping, loff_t from) +int nobh_truncate_page(struct address_space *mapping, + loff_t from, get_block_t *get_block) { - struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); - unsigned to; + unsigned blocksize; + sector_t iblock; + unsigned length, pos; + struct inode *inode = mapping->host; struct page *page; - const struct address_space_operations *a_ops = mapping->a_ops; - int ret = 0; + struct buffer_head map_bh; + int err; - if ((offset & (blocksize - 1)) == 0) - goto out; + blocksize = 1 << inode->i_blkbits; + length = offset & (blocksize - 1); + + /* Block boundary? Nothing to do */ + if (!length) + return 0; + + length = blocksize - length; + iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ret = -ENOMEM; page = grab_cache_page(mapping, index); + err = -ENOMEM; if (!page) goto out; - to = (offset + blocksize) & ~(blocksize - 1); - ret = a_ops->prepare_write(NULL, page, offset, to); - if (ret == 0) { - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, - KM_USER0); - /* - * It would be more correct to call aops->commit_write() - * here, but this is more efficient. - */ - SetPageUptodate(page); - set_page_dirty(page); + if (page_has_buffers(page)) { +has_buffers: + unlock_page(page); + page_cache_release(page); + return block_truncate_page(mapping, from, get_block); } + + /* Find the buffer that contains "offset" */ + pos = blocksize; + while (offset >= pos) { + iblock++; + pos += blocksize; + } + + err = get_block(inode, iblock, &map_bh, 0); + if (err) + goto unlock; + /* unmapped? It's a hole - nothing to do */ + if (!buffer_mapped(&map_bh)) + goto unlock; + + /* Ok, it's mapped. Make sure it's up-to-date */ + if (!PageUptodate(page)) { + err = mapping->a_ops->readpage(NULL, page); + if (err) { + page_cache_release(page); + goto out; + } + lock_page(page); + if (!PageUptodate(page)) { + err = -EIO; + goto unlock; + } + if (page_has_buffers(page)) + goto has_buffers; + } + zero_user_page(page, offset, length, KM_USER0); + set_page_dirty(page); + err = 0; + +unlock: unlock_page(page); page_cache_release(page); out: - return ret; + return err; } EXPORT_SYMBOL(nobh_truncate_page); @@ -2956,7 +3169,8 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); + struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, + set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); get_cpu_var(bh_accounting).nr++; @@ -3024,14 +3238,13 @@ EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_write_full_page); -EXPORT_SYMBOL(cont_prepare_write); +EXPORT_SYMBOL(cont_write_begin); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_commit_write); -EXPORT_SYMBOL(generic_cont_expand); EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index ddc003a9d21..dbd257d956c 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -41,8 +41,8 @@ extern struct super_block * configfs_sb; static const struct address_space_operations configfs_aops = { .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write + .write_begin = simple_write_begin, + .write_end = simple_write_end, }; static struct backing_dev_info configfs_backing_dev_info = { diff --git a/fs/dcache.c b/fs/dcache.c index 678d39deb60..7da0cf50873 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -903,7 +903,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry; char *dname; - dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); + dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); if (!dentry) return NULL; diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index a9b99c0dc2e..fa6b7f7ff91 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -227,15 +227,24 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n" DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"); -/** - * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value - * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value - * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value +/* + * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value * - * These functions are exactly the same as the above functions, (but use a hex - * output for the decimal challenged) for details look at the above unsigned + * These functions are exactly the same as the above functions (but use a hex + * output for the decimal challenged). For details look at the above unsigned * decimal functions. */ + +/** + * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read to and write + * from. + */ struct dentry *debugfs_create_x8(const char *name, mode_t mode, struct dentry *parent, u8 *value) { @@ -243,6 +252,16 @@ struct dentry *debugfs_create_x8(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_x8); +/** + * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read to and write + * from. + */ struct dentry *debugfs_create_x16(const char *name, mode_t mode, struct dentry *parent, u16 *value) { @@ -250,6 +269,16 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_x16); +/** + * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read to and write + * from. + */ struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value) { diff --git a/fs/direct-io.c b/fs/direct-io.c index b5928a7b6a5..acf0da1bd25 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -163,7 +163,7 @@ static int dio_refill_pages(struct dio *dio) up_read(¤t->mm->mmap_sem); if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { - struct page *page = ZERO_PAGE(dio->curr_user_address); + struct page *page = ZERO_PAGE(0); /* * A memory fault, but the filesystem has some outstanding * mapped blocks. We need to use those blocks up to avoid @@ -763,7 +763,7 @@ static void dio_zero_block(struct dio *dio, int end) this_chunk_bytes = this_chunk_blocks << dio->blkbits; - page = ZERO_PAGE(dio->curr_user_address); + page = ZERO_PAGE(0); if (submit_page_section(dio, page, 0, this_chunk_bytes, dio->next_block_for_io)) return; diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 1f1107237ea..76885701551 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o messaging.o netlink.o debug.o +ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 6ac630625b7..1ae90ef2c74 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -123,9 +123,9 @@ out: return rc; } -int ecryptfs_crypto_api_algify_cipher_name(char **algified_name, - char *cipher_name, - char *chaining_modifier) +static int ecryptfs_crypto_api_algify_cipher_name(char **algified_name, + char *cipher_name, + char *chaining_modifier) { int cipher_name_len = strlen(cipher_name); int chaining_modifier_len = strlen(chaining_modifier); @@ -149,7 +149,7 @@ out: * ecryptfs_derive_iv * @iv: destination for the derived iv vale * @crypt_stat: Pointer to crypt_stat struct for the current inode - * @offset: Offset of the page whose's iv we are to derive + * @offset: Offset of the extent whose IV we are to derive * * Generate the initialization vector from the given root IV and page * offset. @@ -157,7 +157,7 @@ out: * Returns zero on success; non-zero on error. */ static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, - pgoff_t offset) + loff_t offset) { int rc = 0; char dst[MD5_DIGEST_SIZE]; @@ -173,7 +173,7 @@ static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, * hashing business. -Halcrow */ memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes); memset((src + crypt_stat->iv_bytes), 0, 16); - snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset); + snprintf((src + crypt_stat->iv_bytes), 16, "%lld", offset); if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "source:\n"); ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16)); @@ -204,6 +204,8 @@ void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) { memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); + INIT_LIST_HEAD(&crypt_stat->keysig_list); + mutex_init(&crypt_stat->keysig_list_mutex); mutex_init(&crypt_stat->cs_mutex); mutex_init(&crypt_stat->cs_tfm_mutex); mutex_init(&crypt_stat->cs_hash_tfm_mutex); @@ -211,27 +213,48 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) } /** - * ecryptfs_destruct_crypt_stat + * ecryptfs_destroy_crypt_stat * @crypt_stat: Pointer to the crypt_stat struct to initialize. * * Releases all memory associated with a crypt_stat struct. */ -void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) +void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) { + struct ecryptfs_key_sig *key_sig, *key_sig_tmp; + if (crypt_stat->tfm) crypto_free_blkcipher(crypt_stat->tfm); if (crypt_stat->hash_tfm) crypto_free_hash(crypt_stat->hash_tfm); + mutex_lock(&crypt_stat->keysig_list_mutex); + list_for_each_entry_safe(key_sig, key_sig_tmp, + &crypt_stat->keysig_list, crypt_stat_list) { + list_del(&key_sig->crypt_stat_list); + kmem_cache_free(ecryptfs_key_sig_cache, key_sig); + } + mutex_unlock(&crypt_stat->keysig_list_mutex); memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); } -void ecryptfs_destruct_mount_crypt_stat( +void ecryptfs_destroy_mount_crypt_stat( struct ecryptfs_mount_crypt_stat *mount_crypt_stat) { - if (mount_crypt_stat->global_auth_tok_key) - key_put(mount_crypt_stat->global_auth_tok_key); - if (mount_crypt_stat->global_key_tfm) - crypto_free_blkcipher(mount_crypt_stat->global_key_tfm); + struct ecryptfs_global_auth_tok *auth_tok, *auth_tok_tmp; + + if (!(mount_crypt_stat->flags & ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED)) + return; + mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_for_each_entry_safe(auth_tok, auth_tok_tmp, + &mount_crypt_stat->global_auth_tok_list, + mount_crypt_stat_list) { + list_del(&auth_tok->mount_crypt_stat_list); + mount_crypt_stat->num_global_auth_toks--; + if (auth_tok->global_auth_tok_key + && !(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID)) + key_put(auth_tok->global_auth_tok_key); + kmem_cache_free(ecryptfs_global_auth_tok_cache, auth_tok); + } + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat)); } @@ -330,114 +353,82 @@ out: return rc; } -static void -ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx, - int *byte_offset, - struct ecryptfs_crypt_stat *crypt_stat, - unsigned long extent_num) +/** + * ecryptfs_lower_offset_for_extent + * + * Convert an eCryptfs page index into a lower byte offset + */ +void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, + struct ecryptfs_crypt_stat *crypt_stat) { - unsigned long lower_extent_num; - int extents_occupied_by_headers_at_front; - int bytes_occupied_by_headers_at_front; - int extent_offset; - int extents_per_page; - - bytes_occupied_by_headers_at_front = - ( crypt_stat->header_extent_size - * crypt_stat->num_header_extents_at_front ); - extents_occupied_by_headers_at_front = - ( bytes_occupied_by_headers_at_front - / crypt_stat->extent_size ); - lower_extent_num = extents_occupied_by_headers_at_front + extent_num; - extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; - (*lower_page_idx) = lower_extent_num / extents_per_page; - extent_offset = lower_extent_num % extents_per_page; - (*byte_offset) = extent_offset * crypt_stat->extent_size; - ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = " - "[%d]\n", crypt_stat->header_extent_size); - ecryptfs_printk(KERN_DEBUG, " * crypt_stat->" - "num_header_extents_at_front = [%d]\n", - crypt_stat->num_header_extents_at_front); - ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_" - "front = [%d]\n", extents_occupied_by_headers_at_front); - ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n", - lower_extent_num); - ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n", - extents_per_page); - ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n", - (*lower_page_idx)); - ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n", - extent_offset); - ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n", - (*byte_offset)); + (*offset) = ((crypt_stat->extent_size + * crypt_stat->num_header_extents_at_front) + + (crypt_stat->extent_size * extent_num)); } -static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx, - struct page *lower_page, - struct inode *lower_inode, - int byte_offset_in_page, int bytes_to_write) +/** + * ecryptfs_encrypt_extent + * @enc_extent_page: Allocated page into which to encrypt the data in + * @page + * @crypt_stat: crypt_stat containing cryptographic context for the + * encryption operation + * @page: Page containing plaintext data extent to encrypt + * @extent_offset: Page extent offset for use in generating IV + * + * Encrypts one extent of data. + * + * Return zero on success; non-zero otherwise + */ +static int ecryptfs_encrypt_extent(struct page *enc_extent_page, + struct ecryptfs_crypt_stat *crypt_stat, + struct page *page, + unsigned long extent_offset) { - int rc = 0; + loff_t extent_base; + char extent_iv[ECRYPTFS_MAX_IV_BYTES]; + int rc; - if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { - rc = ecryptfs_commit_lower_page(lower_page, lower_inode, - ctx->param.lower_file, - byte_offset_in_page, - bytes_to_write); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error calling lower " - "commit; rc = [%d]\n", rc); - goto out; - } - } else { - rc = ecryptfs_writepage_and_release_lower_page(lower_page, - lower_inode, - ctx->param.wbc); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error calling lower " - "writepage(); rc = [%d]\n", rc); - goto out; - } + extent_base = (((loff_t)page->index) + * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); + rc = ecryptfs_derive_iv(extent_iv, crypt_stat, + (extent_base + extent_offset)); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting to " + "derive IV for extent [0x%.16x]; " + "rc = [%d]\n", (extent_base + extent_offset), + rc); + goto out; } -out: - return rc; -} - -static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx, - struct page **lower_page, - struct inode *lower_inode, - unsigned long lower_page_idx, - int byte_offset_in_page) -{ - int rc = 0; - - if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) { - /* TODO: Limit this to only the data extents that are - * needed */ - rc = ecryptfs_get_lower_page(lower_page, lower_inode, - ctx->param.lower_file, - lower_page_idx, - byte_offset_in_page, - (PAGE_CACHE_SIZE - - byte_offset_in_page)); - if (rc) { - ecryptfs_printk( - KERN_ERR, "Error attempting to grab, map, " - "and prepare_write lower page with index " - "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc); - goto out; - } - } else { - *lower_page = grab_cache_page(lower_inode->i_mapping, - lower_page_idx); - if (!(*lower_page)) { - rc = -EINVAL; - ecryptfs_printk( - KERN_ERR, "Error attempting to grab and map " - "lower page with index [0x%.16x]; rc = [%d]\n", - lower_page_idx, rc); - goto out; - } + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Encrypting extent " + "with iv:\n"); + ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); + ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " + "encryption:\n"); + ecryptfs_dump_hex((char *) + (page_address(page) + + (extent_offset * crypt_stat->extent_size)), + 8); + } + rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, + page, (extent_offset + * crypt_stat->extent_size), + crypt_stat->extent_size, extent_iv); + if (rc < 0) { + printk(KERN_ERR "%s: Error attempting to encrypt page with " + "page->index = [%ld], extent_offset = [%ld]; " + "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, + rc); + goto out; + } + rc = 0; + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; " + "rc = [%d]\n", (extent_base + extent_offset), + rc); + ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " + "encryption:\n"); + ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8); } out: return rc; @@ -445,7 +436,9 @@ out: /** * ecryptfs_encrypt_page - * @ctx: The context of the page + * @page: Page mapped from the eCryptfs inode for the file; contains + * decrypted content that needs to be encrypted (to a temporary + * page; not in place) and written out to the lower file * * Encrypt an eCryptfs page. This is done on a per-extent basis. Note * that eCryptfs pages may straddle the lower pages -- for instance, @@ -455,128 +448,122 @@ out: * file, 24K of page 0 of the lower file will be read and decrypted, * and then 8K of page 1 of the lower file will be read and decrypted. * - * The actual operations performed on each page depends on the - * contents of the ecryptfs_page_crypt_context struct. - * * Returns zero on success; negative on error */ -int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx) +int ecryptfs_encrypt_page(struct page *page) { - char extent_iv[ECRYPTFS_MAX_IV_BYTES]; - unsigned long base_extent; - unsigned long extent_offset = 0; - unsigned long lower_page_idx = 0; - unsigned long prior_lower_page_idx = 0; - struct page *lower_page; - struct inode *lower_inode; - struct ecryptfs_inode_info *inode_info; + struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; + char *enc_extent_virt = NULL; + struct page *enc_extent_page; + loff_t extent_offset; int rc = 0; - int lower_byte_offset = 0; - int orig_byte_offset = 0; - int num_extents_per_page; -#define ECRYPTFS_PAGE_STATE_UNREAD 0 -#define ECRYPTFS_PAGE_STATE_READ 1 -#define ECRYPTFS_PAGE_STATE_MODIFIED 2 -#define ECRYPTFS_PAGE_STATE_WRITTEN 3 - int page_state; - - lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host); - inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host); - crypt_stat = &inode_info->crypt_stat; + + ecryptfs_inode = page->mapping->host; + crypt_stat = + &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { - rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode, - ctx->param.lower_file); + rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, + 0, PAGE_CACHE_SIZE); if (rc) - ecryptfs_printk(KERN_ERR, "Error attempting to copy " - "page at index [0x%.16x]\n", - ctx->page->index); + printk(KERN_ERR "%s: Error attempting to copy " + "page at index [%ld]\n", __FUNCTION__, + page->index); goto out; } - num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; - base_extent = (ctx->page->index * num_extents_per_page); - page_state = ECRYPTFS_PAGE_STATE_UNREAD; - while (extent_offset < num_extents_per_page) { - ecryptfs_extent_to_lwr_pg_idx_and_offset( - &lower_page_idx, &lower_byte_offset, crypt_stat, - (base_extent + extent_offset)); - if (prior_lower_page_idx != lower_page_idx - && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) { - rc = ecryptfs_write_out_page(ctx, lower_page, - lower_inode, - orig_byte_offset, - (PAGE_CACHE_SIZE - - orig_byte_offset)); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting " - "to write out page; rc = [%d]" - "\n", rc); - goto out; - } - page_state = ECRYPTFS_PAGE_STATE_WRITTEN; - } - if (page_state == ECRYPTFS_PAGE_STATE_UNREAD - || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) { - rc = ecryptfs_read_in_page(ctx, &lower_page, - lower_inode, lower_page_idx, - lower_byte_offset); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting " - "to read in lower page with " - "index [0x%.16x]; rc = [%d]\n", - lower_page_idx, rc); - goto out; - } - orig_byte_offset = lower_byte_offset; - prior_lower_page_idx = lower_page_idx; - page_state = ECRYPTFS_PAGE_STATE_READ; - } - BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED - || page_state == ECRYPTFS_PAGE_STATE_READ)); - rc = ecryptfs_derive_iv(extent_iv, crypt_stat, - (base_extent + extent_offset)); + enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); + if (!enc_extent_virt) { + rc = -ENOMEM; + ecryptfs_printk(KERN_ERR, "Error allocating memory for " + "encrypted extent\n"); + goto out; + } + enc_extent_page = virt_to_page(enc_extent_virt); + for (extent_offset = 0; + extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); + extent_offset++) { + loff_t offset; + + rc = ecryptfs_encrypt_extent(enc_extent_page, crypt_stat, page, + extent_offset); if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to " - "derive IV for extent [0x%.16x]; " - "rc = [%d]\n", - (base_extent + extent_offset), rc); + printk(KERN_ERR "%s: Error encrypting extent; " + "rc = [%d]\n", __FUNCTION__, rc); goto out; } - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Encrypting extent " - "with iv:\n"); - ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); - ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " - "encryption:\n"); - ecryptfs_dump_hex((char *) - (page_address(ctx->page) - + (extent_offset - * crypt_stat->extent_size)), 8); - } - rc = ecryptfs_encrypt_page_offset( - crypt_stat, lower_page, lower_byte_offset, ctx->page, - (extent_offset * crypt_stat->extent_size), - crypt_stat->extent_size, extent_iv); - ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; " - "rc = [%d]\n", - (base_extent + extent_offset), rc); - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " - "encryption:\n"); - ecryptfs_dump_hex((char *)(page_address(lower_page) - + lower_byte_offset), 8); + ecryptfs_lower_offset_for_extent( + &offset, ((((loff_t)page->index) + * (PAGE_CACHE_SIZE + / crypt_stat->extent_size)) + + extent_offset), crypt_stat); + rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, + offset, crypt_stat->extent_size); + if (rc) { + ecryptfs_printk(KERN_ERR, "Error attempting " + "to write lower page; rc = [%d]" + "\n", rc); + goto out; } - page_state = ECRYPTFS_PAGE_STATE_MODIFIED; extent_offset++; } - BUG_ON(orig_byte_offset != 0); - rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0, - (lower_byte_offset - + crypt_stat->extent_size)); +out: + kfree(enc_extent_virt); + return rc; +} + +static int ecryptfs_decrypt_extent(struct page *page, + struct ecryptfs_crypt_stat *crypt_stat, + struct page *enc_extent_page, + unsigned long extent_offset) +{ + loff_t extent_base; + char extent_iv[ECRYPTFS_MAX_IV_BYTES]; + int rc; + + extent_base = (((loff_t)page->index) + * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); + rc = ecryptfs_derive_iv(extent_iv, crypt_stat, + (extent_base + extent_offset)); if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to write out " - "page; rc = [%d]\n", rc); - goto out; + ecryptfs_printk(KERN_ERR, "Error attempting to " + "derive IV for extent [0x%.16x]; " + "rc = [%d]\n", (extent_base + extent_offset), + rc); + goto out; + } + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Decrypting extent " + "with iv:\n"); + ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); + ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " + "decryption:\n"); + ecryptfs_dump_hex((char *) + (page_address(enc_extent_page) + + (extent_offset * crypt_stat->extent_size)), + 8); + } + rc = ecryptfs_decrypt_page_offset(crypt_stat, page, + (extent_offset + * crypt_stat->extent_size), + enc_extent_page, 0, + crypt_stat->extent_size, extent_iv); + if (rc < 0) { + printk(KERN_ERR "%s: Error attempting to decrypt to page with " + "page->index = [%ld], extent_offset = [%ld]; " + "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, + rc); + goto out; + } + rc = 0; + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; " + "rc = [%d]\n", (extent_base + extent_offset), + rc); + ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " + "decryption:\n"); + ecryptfs_dump_hex((char *)(page_address(page) + + (extent_offset + * crypt_stat->extent_size)), 8); } out: return rc; @@ -584,8 +571,9 @@ out: /** * ecryptfs_decrypt_page - * @file: The ecryptfs file - * @page: The page in ecryptfs to decrypt + * @page: Page mapped from the eCryptfs inode for the file; data read + * and decrypted from the lower file will be written into this + * page * * Decrypt an eCryptfs page. This is done on a per-extent basis. Note * that eCryptfs pages may straddle the lower pages -- for instance, @@ -597,108 +585,75 @@ out: * * Returns zero on success; negative on error */ -int ecryptfs_decrypt_page(struct file *file, struct page *page) +int ecryptfs_decrypt_page(struct page *page) { - char extent_iv[ECRYPTFS_MAX_IV_BYTES]; - unsigned long base_extent; - unsigned long extent_offset = 0; - unsigned long lower_page_idx = 0; - unsigned long prior_lower_page_idx = 0; - struct page *lower_page; - char *lower_page_virt = NULL; - struct inode *lower_inode; + struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; + char *enc_extent_virt = NULL; + struct page *enc_extent_page; + unsigned long extent_offset; int rc = 0; - int byte_offset; - int num_extents_per_page; - int page_state; - crypt_stat = &(ecryptfs_inode_to_private( - page->mapping->host)->crypt_stat); - lower_inode = ecryptfs_inode_to_lower(page->mapping->host); + ecryptfs_inode = page->mapping->host; + crypt_stat = + &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { - rc = ecryptfs_do_readpage(file, page, page->index); + rc = ecryptfs_read_lower_page_segment(page, page->index, 0, + PAGE_CACHE_SIZE, + ecryptfs_inode); if (rc) - ecryptfs_printk(KERN_ERR, "Error attempting to copy " - "page at index [0x%.16x]\n", - page->index); + printk(KERN_ERR "%s: Error attempting to copy " + "page at index [%ld]\n", __FUNCTION__, + page->index); goto out; } - num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; - base_extent = (page->index * num_extents_per_page); - lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache, - GFP_KERNEL); - if (!lower_page_virt) { + enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); + if (!enc_extent_virt) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Error getting page for encrypted " - "lower page(s)\n"); + ecryptfs_printk(KERN_ERR, "Error allocating memory for " + "encrypted extent\n"); goto out; } - lower_page = virt_to_page(lower_page_virt); - page_state = ECRYPTFS_PAGE_STATE_UNREAD; - while (extent_offset < num_extents_per_page) { - ecryptfs_extent_to_lwr_pg_idx_and_offset( - &lower_page_idx, &byte_offset, crypt_stat, - (base_extent + extent_offset)); - if (prior_lower_page_idx != lower_page_idx - || page_state == ECRYPTFS_PAGE_STATE_UNREAD) { - rc = ecryptfs_do_readpage(file, lower_page, - lower_page_idx); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error reading " - "lower encrypted page; rc = " - "[%d]\n", rc); - goto out; - } - prior_lower_page_idx = lower_page_idx; - page_state = ECRYPTFS_PAGE_STATE_READ; - } - rc = ecryptfs_derive_iv(extent_iv, crypt_stat, - (base_extent + extent_offset)); + enc_extent_page = virt_to_page(enc_extent_virt); + for (extent_offset = 0; + extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); + extent_offset++) { + loff_t offset; + + ecryptfs_lower_offset_for_extent( + &offset, ((page->index * (PAGE_CACHE_SIZE + / crypt_stat->extent_size)) + + extent_offset), crypt_stat); + rc = ecryptfs_read_lower(enc_extent_virt, offset, + crypt_stat->extent_size, + ecryptfs_inode); if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to " - "derive IV for extent [0x%.16x]; rc = " - "[%d]\n", - (base_extent + extent_offset), rc); + ecryptfs_printk(KERN_ERR, "Error attempting " + "to read lower page; rc = [%d]" + "\n", rc); goto out; } - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Decrypting extent " - "with iv:\n"); - ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); - ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " - "decryption:\n"); - ecryptfs_dump_hex((lower_page_virt + byte_offset), 8); - } - rc = ecryptfs_decrypt_page_offset(crypt_stat, page, - (extent_offset - * crypt_stat->extent_size), - lower_page, byte_offset, - crypt_stat->extent_size, - extent_iv); - if (rc != crypt_stat->extent_size) { - ecryptfs_printk(KERN_ERR, "Error attempting to " - "decrypt extent [0x%.16x]\n", - (base_extent + extent_offset)); + rc = ecryptfs_decrypt_extent(page, crypt_stat, enc_extent_page, + extent_offset); + if (rc) { + printk(KERN_ERR "%s: Error encrypting extent; " + "rc = [%d]\n", __FUNCTION__, rc); goto out; } - rc = 0; - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " - "decryption:\n"); - ecryptfs_dump_hex((char *)(page_address(page) - + byte_offset), 8); - } extent_offset++; } out: - if (lower_page_virt) - kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt); + kfree(enc_extent_virt); return rc; } /** * decrypt_scatterlist + * @crypt_stat: Cryptographic context + * @dest_sg: The destination scatterlist to decrypt into + * @src_sg: The source scatterlist to decrypt from + * @size: The number of bytes to decrypt + * @iv: The initialization vector to use for the decryption * * Returns the number of bytes decrypted; negative value on error */ @@ -740,6 +695,13 @@ out: /** * ecryptfs_encrypt_page_offset + * @crypt_stat: The cryptographic context + * @dst_page: The page to encrypt into + * @dst_offset: The offset in the page to encrypt into + * @src_page: The page to encrypt from + * @src_offset: The offset in the page to encrypt from + * @size: The number of bytes to encrypt + * @iv: The initialization vector to use for the encryption * * Returns the number of bytes encrypted */ @@ -762,6 +724,13 @@ ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, /** * ecryptfs_decrypt_page_offset + * @crypt_stat: The cryptographic context + * @dst_page: The page to decrypt into + * @dst_offset: The offset in the page to decrypt into + * @src_page: The page to decrypt from + * @src_offset: The offset in the page to decrypt from + * @size: The number of bytes to decrypt + * @iv: The initialization vector to use for the decryption * * Returns the number of bytes decrypted */ @@ -857,15 +826,17 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; set_extent_mask_and_shift(crypt_stat); crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; - if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) { - crypt_stat->header_extent_size = - ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; - } else - crypt_stat->header_extent_size = PAGE_CACHE_SIZE; if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) crypt_stat->num_header_extents_at_front = 0; - else - crypt_stat->num_header_extents_at_front = 1; + else { + if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) + crypt_stat->num_header_extents_at_front = + (ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE + / crypt_stat->extent_size); + else + crypt_stat->num_header_extents_at_front = + (PAGE_CACHE_SIZE / crypt_stat->extent_size); + } } /** @@ -917,6 +888,8 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat) /** * ecryptfs_copy_mount_wide_flags_to_inode_flags + * @crypt_stat: The inode's cryptographic context + * @mount_crypt_stat: The mount point's cryptographic context * * This function propagates the mount-wide flags to individual inode * flags. @@ -931,9 +904,34 @@ static void ecryptfs_copy_mount_wide_flags_to_inode_flags( crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED; } +static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs( + struct ecryptfs_crypt_stat *crypt_stat, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + struct ecryptfs_global_auth_tok *global_auth_tok; + int rc = 0; + + mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_for_each_entry(global_auth_tok, + &mount_crypt_stat->global_auth_tok_list, + mount_crypt_stat_list) { + rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig); + if (rc) { + printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc); + mutex_unlock( + &mount_crypt_stat->global_auth_tok_list_mutex); + goto out; + } + } + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); +out: + return rc; +} + /** * ecryptfs_set_default_crypt_stat_vals - * @crypt_stat + * @crypt_stat: The inode's cryptographic context + * @mount_crypt_stat: The mount point's cryptographic context * * Default values in the event that policy does not override them. */ @@ -953,7 +951,7 @@ static void ecryptfs_set_default_crypt_stat_vals( /** * ecryptfs_new_file_context - * @ecryptfs_dentry + * @ecryptfs_dentry: The eCryptfs dentry * * If the crypto context for the file has not yet been established, * this is where we do that. Establishing a new crypto context @@ -970,49 +968,42 @@ static void ecryptfs_set_default_crypt_stat_vals( * * Returns zero on success; non-zero otherwise */ -/* Associate an authentication token(s) with the file */ int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry) { - int rc = 0; struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; int cipher_name_len; + int rc = 0; ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat); - /* See if there are mount crypt options */ - if (mount_crypt_stat->global_auth_tok) { - ecryptfs_printk(KERN_DEBUG, "Initializing context for new " - "file using mount_crypt_stat\n"); - crypt_stat->flags |= ECRYPTFS_ENCRYPTED; - crypt_stat->flags |= ECRYPTFS_KEY_VALID; - ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, - mount_crypt_stat); - memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++], - mount_crypt_stat->global_auth_tok_sig, - ECRYPTFS_SIG_SIZE_HEX); - cipher_name_len = - strlen(mount_crypt_stat->global_default_cipher_name); - memcpy(crypt_stat->cipher, - mount_crypt_stat->global_default_cipher_name, - cipher_name_len); - crypt_stat->cipher[cipher_name_len] = '\0'; - crypt_stat->key_size = - mount_crypt_stat->global_default_cipher_key_size; - ecryptfs_generate_new_key(crypt_stat); - } else - /* We should not encounter this scenario since we - * should detect lack of global_auth_tok at mount time - * TODO: Applies to 0.1 release only; remove in future - * release */ - BUG(); + crypt_stat->flags |= (ECRYPTFS_ENCRYPTED | ECRYPTFS_KEY_VALID); + ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, + mount_crypt_stat); + rc = ecryptfs_copy_mount_wide_sigs_to_inode_sigs(crypt_stat, + mount_crypt_stat); + if (rc) { + printk(KERN_ERR "Error attempting to copy mount-wide key sigs " + "to the inode key sigs; rc = [%d]\n", rc); + goto out; + } + cipher_name_len = + strlen(mount_crypt_stat->global_default_cipher_name); + memcpy(crypt_stat->cipher, + mount_crypt_stat->global_default_cipher_name, + cipher_name_len); + crypt_stat->cipher[cipher_name_len] = '\0'; + crypt_stat->key_size = + mount_crypt_stat->global_default_cipher_key_size; + ecryptfs_generate_new_key(crypt_stat); rc = ecryptfs_init_crypt_ctx(crypt_stat); if (rc) ecryptfs_printk(KERN_ERR, "Error initializing cryptographic " "context for cipher [%s]: rc = [%d]\n", crypt_stat->cipher, rc); +out: return rc; } @@ -1054,7 +1045,7 @@ static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = { /** * ecryptfs_process_flags - * @crypt_stat + * @crypt_stat: The cryptographic context * @page_virt: Source data to be parsed * @bytes_read: Updated with the number of bytes read * @@ -1142,7 +1133,7 @@ ecryptfs_cipher_code_str_map[] = { /** * ecryptfs_code_for_cipher_string - * @str: The string representing the cipher name + * @crypt_stat: The cryptographic context * * Returns zero on no match, or the cipher code on match */ @@ -1198,59 +1189,28 @@ int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code) return rc; } -/** - * ecryptfs_read_header_region - * @data - * @dentry - * @nd - * - * Returns zero on success; non-zero otherwise - */ -static int ecryptfs_read_header_region(char *data, struct dentry *dentry, - struct vfsmount *mnt) +int ecryptfs_read_and_validate_header_region(char *data, + struct inode *ecryptfs_inode) { - struct file *lower_file; - mm_segment_t oldfs; + struct ecryptfs_crypt_stat *crypt_stat = + &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); int rc; - if ((rc = ecryptfs_open_lower_file(&lower_file, dentry, mnt, - O_RDONLY))) { - printk(KERN_ERR - "Error opening lower_file to read header region\n"); - goto out; - } - lower_file->f_pos = 0; - oldfs = get_fs(); - set_fs(get_ds()); - /* For releases 0.1 and 0.2, all of the header information - * fits in the first data extent-sized region. */ - rc = lower_file->f_op->read(lower_file, (char __user *)data, - ECRYPTFS_DEFAULT_EXTENT_SIZE, &lower_file->f_pos); - set_fs(oldfs); - if ((rc = ecryptfs_close_lower_file(lower_file))) { - printk(KERN_ERR "Error closing lower_file\n"); + rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, + ecryptfs_inode); + if (rc) { + printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", + __FUNCTION__, rc); goto out; } - rc = 0; -out: - return rc; -} - -int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry, - struct vfsmount *mnt) -{ - int rc; - - rc = ecryptfs_read_header_region(data, dentry, mnt); - if (rc) - goto out; - if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) + if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { rc = -EINVAL; + ecryptfs_printk(KERN_DEBUG, "Valid marker not found\n"); + } out: return rc; } - void ecryptfs_write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, @@ -1259,7 +1219,7 @@ ecryptfs_write_header_metadata(char *virt, u32 header_extent_size; u16 num_header_extents_at_front; - header_extent_size = (u32)crypt_stat->header_extent_size; + header_extent_size = (u32)crypt_stat->extent_size; num_header_extents_at_front = (u16)crypt_stat->num_header_extents_at_front; header_extent_size = cpu_to_be32(header_extent_size); @@ -1276,9 +1236,10 @@ struct kmem_cache *ecryptfs_header_cache_2; /** * ecryptfs_write_headers_virt - * @page_virt - * @crypt_stat - * @ecryptfs_dentry + * @page_virt: The virtual address to write the headers to + * @size: Set to the number of bytes written by this function + * @crypt_stat: The cryptographic context + * @ecryptfs_dentry: The eCryptfs dentry * * Format version: 1 * @@ -1332,53 +1293,50 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, return rc; } -static int ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, - struct file *lower_file, - char *page_virt) +static int +ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, + struct dentry *ecryptfs_dentry, + char *page_virt) { - mm_segment_t oldfs; int current_header_page; int header_pages; - ssize_t size; - int rc = 0; + int rc; - lower_file->f_pos = 0; - oldfs = get_fs(); - set_fs(get_ds()); - size = vfs_write(lower_file, (char __user *)page_virt, PAGE_CACHE_SIZE, - &lower_file->f_pos); - if (size < 0) { - rc = (int)size; - printk(KERN_ERR "Error attempting to write lower page; " - "rc = [%d]\n", rc); - set_fs(oldfs); + rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt, + 0, PAGE_CACHE_SIZE); + if (rc) { + printk(KERN_ERR "%s: Error attempting to write header " + "information to lower file; rc = [%d]\n", __FUNCTION__, + rc); goto out; } - header_pages = ((crypt_stat->header_extent_size + header_pages = ((crypt_stat->extent_size * crypt_stat->num_header_extents_at_front) / PAGE_CACHE_SIZE); memset(page_virt, 0, PAGE_CACHE_SIZE); current_header_page = 1; while (current_header_page < header_pages) { - size = vfs_write(lower_file, (char __user *)page_virt, - PAGE_CACHE_SIZE, &lower_file->f_pos); - if (size < 0) { - rc = (int)size; - printk(KERN_ERR "Error attempting to write lower page; " - "rc = [%d]\n", rc); - set_fs(oldfs); + loff_t offset; + + offset = (((loff_t)current_header_page) << PAGE_CACHE_SHIFT); + if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, + page_virt, offset, + PAGE_CACHE_SIZE))) { + printk(KERN_ERR "%s: Error attempting to write header " + "information to lower file; rc = [%d]\n", + __FUNCTION__, rc); goto out; } current_header_page++; } - set_fs(oldfs); out: return rc; } -static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, - struct ecryptfs_crypt_stat *crypt_stat, - char *page_virt, size_t size) +static int +ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, + struct ecryptfs_crypt_stat *crypt_stat, + char *page_virt, size_t size) { int rc; @@ -1389,7 +1347,7 @@ static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, /** * ecryptfs_write_metadata - * @lower_file: The lower file struct, which was returned from dentry_open + * @ecryptfs_dentry: The eCryptfs dentry * * Write the file headers out. This will likely involve a userspace * callout, in which the session key is encrypted with one or more @@ -1397,22 +1355,21 @@ static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, * retrieved via a prompt. Exactly what happens at this point should * be policy-dependent. * + * TODO: Support header information spanning multiple pages + * * Returns zero on success; non-zero on error */ -int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry, - struct file *lower_file) +int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) { - struct ecryptfs_crypt_stat *crypt_stat; + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; char *page_virt; - size_t size; + size_t size = 0; int rc = 0; - crypt_stat = &ecryptfs_inode_to_private( - ecryptfs_dentry->d_inode)->crypt_stat; if (likely(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { - ecryptfs_printk(KERN_DEBUG, "Key is " - "invalid; bailing out\n"); + printk(KERN_ERR "Key is invalid; bailing out\n"); rc = -EINVAL; goto out; } @@ -1441,7 +1398,8 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry, crypt_stat, page_virt, size); else - rc = ecryptfs_write_metadata_to_contents(crypt_stat, lower_file, + rc = ecryptfs_write_metadata_to_contents(crypt_stat, + ecryptfs_dentry, page_virt); if (rc) { printk(KERN_ERR "Error writing metadata out to lower file; " @@ -1464,28 +1422,28 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, u32 header_extent_size; u16 num_header_extents_at_front; - memcpy(&header_extent_size, virt, 4); + memcpy(&header_extent_size, virt, sizeof(u32)); header_extent_size = be32_to_cpu(header_extent_size); - virt += 4; - memcpy(&num_header_extents_at_front, virt, 2); + virt += sizeof(u32); + memcpy(&num_header_extents_at_front, virt, sizeof(u16)); num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front); - crypt_stat->header_extent_size = (int)header_extent_size; crypt_stat->num_header_extents_at_front = (int)num_header_extents_at_front; - (*bytes_read) = 6; + (*bytes_read) = (sizeof(u32) + sizeof(u16)); if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) - && ((crypt_stat->header_extent_size + && ((crypt_stat->extent_size * crypt_stat->num_header_extents_at_front) < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Invalid header extent size: " - "[%d]\n", crypt_stat->header_extent_size); + printk(KERN_WARNING "Invalid number of header extents: [%zd]\n", + crypt_stat->num_header_extents_at_front); } return rc; } /** * set_default_header_data + * @crypt_stat: The cryptographic context * * For version 0 file format; this function is only for backwards * compatibility for files created with the prior versions of @@ -1493,12 +1451,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, */ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) { - crypt_stat->header_extent_size = 4096; - crypt_stat->num_header_extents_at_front = 1; + crypt_stat->num_header_extents_at_front = 2; } /** * ecryptfs_read_headers_virt + * @page_virt: The virtual address into which to read the headers + * @crypt_stat: The cryptographic context + * @ecryptfs_dentry: The eCryptfs dentry + * @validate_header_size: Whether to validate the header size while reading * * Read/parse the header data. The header format is detailed in the * comment block for the ecryptfs_write_headers_virt() function. @@ -1558,19 +1519,25 @@ out: /** * ecryptfs_read_xattr_region + * @page_virt: The vitual address into which to read the xattr data + * @ecryptfs_inode: The eCryptfs inode * * Attempts to read the crypto metadata from the extended attribute * region of the lower file. + * + * Returns zero on success; non-zero on error */ -int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry) +int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode) { + struct dentry *lower_dentry = + ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; ssize_t size; int rc = 0; - size = ecryptfs_getxattr(ecryptfs_dentry, ECRYPTFS_XATTR_NAME, - page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE); + size = ecryptfs_getxattr_lower(lower_dentry, ECRYPTFS_XATTR_NAME, + page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE); if (size < 0) { - printk(KERN_DEBUG "Error attempting to read the [%s] " + printk(KERN_ERR "Error attempting to read the [%s] " "xattr from the lower file; return value = [%zd]\n", ECRYPTFS_XATTR_NAME, size); rc = -EINVAL; @@ -1585,7 +1552,7 @@ int ecryptfs_read_and_validate_xattr_region(char *page_virt, { int rc; - rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry); + rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry->d_inode); if (rc) goto out; if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) { @@ -1609,15 +1576,13 @@ out: * * Returns zero if valid headers found and parsed; non-zero otherwise */ -int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry, - struct file *lower_file) +int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) { int rc = 0; char *page_virt = NULL; - mm_segment_t oldfs; - ssize_t bytes_read; + struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; struct ecryptfs_crypt_stat *crypt_stat = - &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; + &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; @@ -1628,27 +1593,18 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry, page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER); if (!page_virt) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n"); + printk(KERN_ERR "%s: Unable to allocate page_virt\n", + __FUNCTION__); goto out; } - lower_file->f_pos = 0; - oldfs = get_fs(); - set_fs(get_ds()); - bytes_read = lower_file->f_op->read(lower_file, - (char __user *)page_virt, - ECRYPTFS_DEFAULT_EXTENT_SIZE, - &lower_file->f_pos); - set_fs(oldfs); - if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) { - rc = -EINVAL; - goto out; - } - rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, - ecryptfs_dentry, - ECRYPTFS_VALIDATE_HEADER_SIZE); + rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, + ecryptfs_inode); + if (!rc) + rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, + ecryptfs_dentry, + ECRYPTFS_VALIDATE_HEADER_SIZE); if (rc) { - rc = ecryptfs_read_xattr_region(page_virt, - ecryptfs_dentry); + rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); if (rc) { printk(KERN_DEBUG "Valid eCryptfs headers not found in " "file header region or xattr region\n"); @@ -1776,7 +1732,7 @@ out: } /** - * ecryptfs_process_cipher - Perform cipher initialization. + * ecryptfs_process_key_cipher - Perform key cipher initialization. * @key_tfm: Crypto context for key material, set by this function * @cipher_name: Name of the cipher * @key_size: Size of the key in bytes @@ -1785,9 +1741,9 @@ out: * should be released by other functions, such as on a superblock put * event, regardless of whether this function succeeds for fails. */ -int -ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name, - size_t *key_size) +static int +ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm, + char *cipher_name, size_t *key_size) { char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; char *full_alg_name; @@ -1829,3 +1785,100 @@ ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name, out: return rc; } + +struct kmem_cache *ecryptfs_key_tfm_cache; +struct list_head key_tfm_list; +struct mutex key_tfm_list_mutex; + +int ecryptfs_init_crypto(void) +{ + mutex_init(&key_tfm_list_mutex); + INIT_LIST_HEAD(&key_tfm_list); + return 0; +} + +int ecryptfs_destroy_crypto(void) +{ + struct ecryptfs_key_tfm *key_tfm, *key_tfm_tmp; + + mutex_lock(&key_tfm_list_mutex); + list_for_each_entry_safe(key_tfm, key_tfm_tmp, &key_tfm_list, + key_tfm_list) { + list_del(&key_tfm->key_tfm_list); + if (key_tfm->key_tfm) + crypto_free_blkcipher(key_tfm->key_tfm); + kmem_cache_free(ecryptfs_key_tfm_cache, key_tfm); + } + mutex_unlock(&key_tfm_list_mutex); + return 0; +} + +int +ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, + size_t key_size) +{ + struct ecryptfs_key_tfm *tmp_tfm; + int rc = 0; + + tmp_tfm = kmem_cache_alloc(ecryptfs_key_tfm_cache, GFP_KERNEL); + if (key_tfm != NULL) + (*key_tfm) = tmp_tfm; + if (!tmp_tfm) { + rc = -ENOMEM; + printk(KERN_ERR "Error attempting to allocate from " + "ecryptfs_key_tfm_cache\n"); + goto out; + } + mutex_init(&tmp_tfm->key_tfm_mutex); + strncpy(tmp_tfm->cipher_name, cipher_name, + ECRYPTFS_MAX_CIPHER_NAME_SIZE); + tmp_tfm->key_size = key_size; + rc = ecryptfs_process_key_cipher(&tmp_tfm->key_tfm, + tmp_tfm->cipher_name, + &tmp_tfm->key_size); + if (rc) { + printk(KERN_ERR "Error attempting to initialize key TFM " + "cipher with name = [%s]; rc = [%d]\n", + tmp_tfm->cipher_name, rc); + kmem_cache_free(ecryptfs_key_tfm_cache, tmp_tfm); + if (key_tfm != NULL) + (*key_tfm) = NULL; + goto out; + } + mutex_lock(&key_tfm_list_mutex); + list_add(&tmp_tfm->key_tfm_list, &key_tfm_list); + mutex_unlock(&key_tfm_list_mutex); +out: + return rc; +} + +int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, + struct mutex **tfm_mutex, + char *cipher_name) +{ + struct ecryptfs_key_tfm *key_tfm; + int rc = 0; + + (*tfm) = NULL; + (*tfm_mutex) = NULL; + mutex_lock(&key_tfm_list_mutex); + list_for_each_entry(key_tfm, &key_tfm_list, key_tfm_list) { + if (strcmp(key_tfm->cipher_name, cipher_name) == 0) { + (*tfm) = key_tfm->key_tfm; + (*tfm_mutex) = &key_tfm->key_tfm_mutex; + mutex_unlock(&key_tfm_list_mutex); + goto out; + } + } + mutex_unlock(&key_tfm_list_mutex); + rc = ecryptfs_add_new_key_tfm(&key_tfm, cipher_name, 0); + if (rc) { + printk(KERN_ERR "Error adding new key_tfm to list; rc = [%d]\n", + rc); + goto out; + } + (*tfm) = key_tfm->key_tfm; + (*tfm_mutex) = &key_tfm->key_tfm_mutex; +out: + return rc; +} diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c index 434c7efd80f..3d2bdf546ec 100644 --- a/fs/ecryptfs/debug.c +++ b/fs/ecryptfs/debug.c @@ -38,8 +38,6 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok) auth_tok); if (auth_tok->flags & ECRYPTFS_PRIVATE_KEY) { ecryptfs_printk(KERN_DEBUG, " * private key type\n"); - ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT " - "IN ECRYPTFS VERSION 0.1)\n"); } else { ecryptfs_printk(KERN_DEBUG, " * passphrase type\n"); ecryptfs_to_hex(salt, auth_tok->token.password.salt, diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 1b9dd9a96f1..ce7a5d4aec3 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -38,7 +38,7 @@ /* Version verification for shared data structures w/ userspace */ #define ECRYPTFS_VERSION_MAJOR 0x00 #define ECRYPTFS_VERSION_MINOR 0x04 -#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x02 +#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03 /* These flags indicate which features are supported by the kernel * module; userspace tools such as the mount helper read * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine @@ -48,10 +48,12 @@ #define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 #define ECRYPTFS_VERSIONING_POLICY 0x00000008 #define ECRYPTFS_VERSIONING_XATTR 0x00000010 +#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ | ECRYPTFS_VERSIONING_PUBKEY \ - | ECRYPTFS_VERSIONING_XATTR) + | ECRYPTFS_VERSIONING_XATTR \ + | ECRYPTFS_VERSIONING_MULTKEY) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 @@ -65,8 +67,7 @@ #define ECRYPTFS_MAX_KEY_BYTES 64 #define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 #define ECRYPTFS_DEFAULT_IV_BYTES 16 -#define ECRYPTFS_FILE_VERSION 0x02 -#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192 +#define ECRYPTFS_FILE_VERSION 0x03 #define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 #define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 @@ -144,6 +145,7 @@ struct ecryptfs_private_key { struct ecryptfs_auth_tok { u16 version; /* 8-bit major and 8-bit minor */ u16 token_type; +#define ECRYPTFS_ENCRYPT_ONLY 0x00000001 u32 flags; struct ecryptfs_session_key session_key; u8 reserved[32]; @@ -194,12 +196,11 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_MAX_KEYSET_SIZE 1024 #define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 #define ECRYPTFS_MAX_NUM_ENC_KEYS 64 -#define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */ #define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ #define ECRYPTFS_SALT_BYTES 2 #define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 #define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ -#define ECRYPTFS_FILE_SIZE_BYTES 8 +#define ECRYPTFS_FILE_SIZE_BYTES (sizeof(u64)) #define ECRYPTFS_DEFAULT_CIPHER "aes" #define ECRYPTFS_DEFAULT_KEY_BYTES 16 #define ECRYPTFS_DEFAULT_HASH "md5" @@ -212,6 +213,11 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_TAG_67_PACKET_TYPE 0x43 #define MD5_DIGEST_SIZE 16 +struct ecryptfs_key_sig { + struct list_head crypt_stat_list; + char keysig[ECRYPTFS_SIG_SIZE_HEX]; +}; + /** * This is the primary struct associated with each encrypted file. * @@ -231,8 +237,6 @@ struct ecryptfs_crypt_stat { u32 flags; unsigned int file_version; size_t iv_bytes; - size_t num_keysigs; - size_t header_extent_size; size_t num_header_extents_at_front; size_t extent_size; /* Data extent size; default is 4096 */ size_t key_size; @@ -245,7 +249,8 @@ struct ecryptfs_crypt_stat { unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; - unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX]; + struct list_head keysig_list; + struct mutex keysig_list_mutex; struct mutex cs_tfm_mutex; struct mutex cs_hash_tfm_mutex; struct mutex cs_mutex; @@ -255,6 +260,8 @@ struct ecryptfs_crypt_stat { struct ecryptfs_inode_info { struct inode vfs_inode; struct inode *wii_inode; + struct file *lower_file; + struct mutex lower_file_mutex; struct ecryptfs_crypt_stat crypt_stat; }; @@ -266,6 +273,59 @@ struct ecryptfs_dentry_info { }; /** + * ecryptfs_global_auth_tok - A key used to encrypt all new files under the mountpoint + * @flags: Status flags + * @mount_crypt_stat_list: These auth_toks hang off the mount-wide + * cryptographic context. Every time a new + * inode comes into existence, eCryptfs copies + * the auth_toks on that list to the set of + * auth_toks on the inode's crypt_stat + * @global_auth_tok_key: The key from the user's keyring for the sig + * @global_auth_tok: The key contents + * @sig: The key identifier + * + * ecryptfs_global_auth_tok structs refer to authentication token keys + * in the user keyring that apply to newly created files. A list of + * these objects hangs off of the mount_crypt_stat struct for any + * given eCryptfs mount. This struct maintains a reference to both the + * key contents and the key itself so that the key can be put on + * unmount. + */ +struct ecryptfs_global_auth_tok { +#define ECRYPTFS_AUTH_TOK_INVALID 0x00000001 + u32 flags; + struct list_head mount_crypt_stat_list; + struct key *global_auth_tok_key; + struct ecryptfs_auth_tok *global_auth_tok; + unsigned char sig[ECRYPTFS_SIG_SIZE_HEX + 1]; +}; + +/** + * ecryptfs_key_tfm - Persistent key tfm + * @key_tfm: crypto API handle to the key + * @key_size: Key size in bytes + * @key_tfm_mutex: Mutex to ensure only one operation in eCryptfs is + * using the persistent TFM at any point in time + * @key_tfm_list: Handle to hang this off the module-wide TFM list + * @cipher_name: String name for the cipher for this TFM + * + * Typically, eCryptfs will use the same ciphers repeatedly throughout + * the course of its operations. In order to avoid unnecessarily + * destroying and initializing the same cipher repeatedly, eCryptfs + * keeps a list of crypto API contexts around to use when needed. + */ +struct ecryptfs_key_tfm { + struct crypto_blkcipher *key_tfm; + size_t key_size; + struct mutex key_tfm_mutex; + struct list_head key_tfm_list; + unsigned char cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; +}; + +extern struct list_head key_tfm_list; +extern struct mutex key_tfm_list_mutex; + +/** * This struct is to enable a mount-wide passphrase/salt combo. This * is more or less a stopgap to provide similar functionality to other * crypto filesystems like EncFS or CFS until full policy support is @@ -276,15 +336,14 @@ struct ecryptfs_mount_crypt_stat { #define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001 #define ECRYPTFS_XATTR_METADATA_ENABLED 0x00000002 #define ECRYPTFS_ENCRYPTED_VIEW_ENABLED 0x00000004 +#define ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED 0x00000008 u32 flags; - struct ecryptfs_auth_tok *global_auth_tok; - struct key *global_auth_tok_key; + struct list_head global_auth_tok_list; + struct mutex global_auth_tok_list_mutex; + size_t num_global_auth_toks; size_t global_default_cipher_key_size; - struct crypto_blkcipher *global_key_tfm; - struct mutex global_key_tfm_mutex; unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; - unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1]; }; /* superblock private data. */ @@ -468,6 +527,9 @@ extern struct kmem_cache *ecryptfs_header_cache_2; extern struct kmem_cache *ecryptfs_xattr_cache; extern struct kmem_cache *ecryptfs_lower_page_cache; extern struct kmem_cache *ecryptfs_key_record_cache; +extern struct kmem_cache *ecryptfs_key_sig_cache; +extern struct kmem_cache *ecryptfs_global_auth_tok_cache; +extern struct kmem_cache *ecryptfs_key_tfm_cache; int ecryptfs_interpose(struct dentry *hidden_dentry, struct dentry *this_dentry, struct super_block *sb, @@ -486,44 +548,18 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat); void ecryptfs_rotate_iv(unsigned char *iv); void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); -void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); -void ecryptfs_destruct_mount_crypt_stat( +void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); +void ecryptfs_destroy_mount_crypt_stat( struct ecryptfs_mount_crypt_stat *mount_crypt_stat); int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat); -int ecryptfs_crypto_api_algify_cipher_name(char **algified_name, - char *cipher_name, - char *chaining_modifier); -#define ECRYPTFS_LOWER_I_MUTEX_NOT_HELD 0 -#define ECRYPTFS_LOWER_I_MUTEX_HELD 1 -int ecryptfs_write_inode_size_to_metadata(struct file *lower_file, - struct inode *lower_inode, - struct inode *inode, - struct dentry *ecryptfs_dentry, - int lower_i_mutex_held); -int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, - struct file *lower_file, - unsigned long lower_page_index, int byte_offset, - int region_bytes); -int -ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, - struct file *lower_file, int byte_offset, - int region_size); -int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, - struct file *lower_file); -int ecryptfs_do_readpage(struct file *file, struct page *page, - pgoff_t lower_page_index); -int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, - struct inode *lower_inode, - struct writeback_control *wbc); -int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx); -int ecryptfs_decrypt_page(struct file *file, struct page *page); -int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry, - struct file *lower_file); -int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry, - struct file *lower_file); +int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode); +int ecryptfs_encrypt_page(struct page *page); +int ecryptfs_decrypt_page(struct page *page); +int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry); +int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry); int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); -int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry, - struct vfsmount *mnt); +int ecryptfs_read_and_validate_header_region(char *data, + struct inode *ecryptfs_inode); int ecryptfs_read_and_validate_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry); u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); @@ -533,27 +569,22 @@ int ecryptfs_generate_key_packet_set(char *dest_base, struct ecryptfs_crypt_stat *crypt_stat, struct dentry *ecryptfs_dentry, size_t *len, size_t max); -int process_request_key_err(long err_code); int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, unsigned char *src, struct dentry *ecryptfs_dentry); int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); -int -ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name, - size_t *key_size); int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); int ecryptfs_inode_set(struct inode *inode, void *lower_inode); void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); -int ecryptfs_open_lower_file(struct file **lower_file, - struct dentry *lower_dentry, - struct vfsmount *lower_mnt, int flags); -int ecryptfs_close_lower_file(struct file *lower_file); ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size); +ssize_t +ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, + void *value, size_t size); int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry); +int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); int ecryptfs_process_quit(uid_t uid, pid_t pid); int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, @@ -580,7 +611,43 @@ void ecryptfs_write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat, size_t *written); +int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig); +int +ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + char *sig); +int ecryptfs_get_global_auth_tok_for_sig( + struct ecryptfs_global_auth_tok **global_auth_tok, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig); +int +ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, + size_t key_size); +int ecryptfs_init_crypto(void); +int ecryptfs_destroy_crypto(void); +int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, + struct mutex **tfm_mutex, + char *cipher_name); +int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, + struct ecryptfs_auth_tok **auth_tok, + char *sig); int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); +void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, + struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, + loff_t offset, size_t size); +int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, + struct page *page_for_lower, + size_t offset_in_page, size_t size); +int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, + size_t size); +int ecryptfs_read_lower(char *data, loff_t offset, size_t size, + struct inode *ecryptfs_inode); +int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, + pgoff_t page_index, + size_t offset_in_page, size_t size, + struct inode *ecryptfs_inode); +int ecryptfs_read(char *data, loff_t offset, size_t size, + struct file *ecryptfs_file); +struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 94f456fe4d9..c98c4690a77 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -141,34 +141,6 @@ retry: struct kmem_cache *ecryptfs_file_info_cache; -int ecryptfs_open_lower_file(struct file **lower_file, - struct dentry *lower_dentry, - struct vfsmount *lower_mnt, int flags) -{ - int rc = 0; - - flags |= O_LARGEFILE; - dget(lower_dentry); - mntget(lower_mnt); - *lower_file = dentry_open(lower_dentry, lower_mnt, flags); - if (IS_ERR(*lower_file)) { - printk(KERN_ERR "Error opening lower file for lower_dentry " - "[0x%p], lower_mnt [0x%p], and flags [0x%x]\n", - lower_dentry, lower_mnt, flags); - rc = PTR_ERR(*lower_file); - *lower_file = NULL; - goto out; - } -out: - return rc; -} - -int ecryptfs_close_lower_file(struct file *lower_file) -{ - fput(lower_file); - return 0; -} - /** * ecryptfs_open * @inode: inode speciying file to open @@ -187,11 +159,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); - struct inode *lower_inode = NULL; - struct file *lower_file = NULL; - struct vfsmount *lower_mnt; struct ecryptfs_file_info *file_info; - int lower_flags; mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; @@ -219,25 +187,12 @@ static int ecryptfs_open(struct inode *inode, struct file *file) if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) { ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n"); /* Policy code enabled in future release */ - crypt_stat->flags |= ECRYPTFS_POLICY_APPLIED; - crypt_stat->flags |= ECRYPTFS_ENCRYPTED; + crypt_stat->flags |= (ECRYPTFS_POLICY_APPLIED + | ECRYPTFS_ENCRYPTED); } mutex_unlock(&crypt_stat->cs_mutex); - lower_flags = file->f_flags; - if ((lower_flags & O_ACCMODE) == O_WRONLY) - lower_flags = (lower_flags & O_ACCMODE) | O_RDWR; - if (file->f_flags & O_APPEND) - lower_flags &= ~O_APPEND; - lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); - /* Corresponding fput() in ecryptfs_release() */ - if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt, - lower_flags))) { - ecryptfs_printk(KERN_ERR, "Error opening lower file\n"); - goto out_puts; - } - ecryptfs_set_file_lower(file, lower_file); - /* Isn't this check the same as the one in lookup? */ - lower_inode = lower_dentry->d_inode; + ecryptfs_set_file_lower( + file, ecryptfs_inode_to_private(inode)->lower_file); if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); @@ -247,7 +202,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) mutex_lock(&crypt_stat->cs_mutex); if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { - rc = ecryptfs_read_metadata(ecryptfs_dentry, lower_file); + rc = ecryptfs_read_metadata(ecryptfs_dentry); if (rc) { ecryptfs_printk(KERN_DEBUG, "Valid headers not found\n"); @@ -259,7 +214,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) "and plaintext passthrough mode is not " "enabled; returning -EIO\n"); mutex_unlock(&crypt_stat->cs_mutex); - goto out_puts; + goto out_free; } rc = 0; crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); @@ -271,11 +226,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file) ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] " "size: [0x%.16x]\n", inode, inode->i_ino, i_size_read(inode)); - ecryptfs_set_file_lower(file, lower_file); goto out; -out_puts: - mntput(lower_mnt); - dput(lower_dentry); +out_free: kmem_cache_free(ecryptfs_file_info_cache, ecryptfs_file_to_private(file)); out: @@ -295,19 +247,9 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td) static int ecryptfs_release(struct inode *inode, struct file *file) { - struct file *lower_file = ecryptfs_file_to_lower(file); - struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file); - struct inode *lower_inode = ecryptfs_inode_to_lower(inode); - int rc; - - if ((rc = ecryptfs_close_lower_file(lower_file))) { - printk(KERN_ERR "Error closing lower_file\n"); - goto out; - } - inode->i_blocks = lower_inode->i_blocks; - kmem_cache_free(ecryptfs_file_info_cache, file_info); -out: - return rc; + kmem_cache_free(ecryptfs_file_info_cache, + ecryptfs_file_to_private(file)); + return 0; } static int @@ -338,21 +280,6 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) return rc; } -static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos, - struct pipe_inode_info *pipe, size_t count, - unsigned int flags) -{ - struct file *lower_file = NULL; - int rc = -EINVAL; - - lower_file = ecryptfs_file_to_lower(file); - if (lower_file->f_op && lower_file->f_op->splice_read) - rc = lower_file->f_op->splice_read(lower_file, ppos, pipe, - count, flags); - - return rc; -} - static int ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); @@ -365,7 +292,7 @@ const struct file_operations ecryptfs_dir_fops = { .release = ecryptfs_release, .fsync = ecryptfs_fsync, .fasync = ecryptfs_fasync, - .splice_read = ecryptfs_splice_read, + .splice_read = generic_file_splice_read, }; const struct file_operations ecryptfs_main_fops = { @@ -382,7 +309,7 @@ const struct file_operations ecryptfs_main_fops = { .release = ecryptfs_release, .fsync = ecryptfs_fsync, .fasync = ecryptfs_fasync, - .splice_read = ecryptfs_splice_read, + .splice_read = generic_file_splice_read, }; static int diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 131954b3fb9..5701f816faf 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -119,10 +119,23 @@ ecryptfs_do_create(struct inode *directory_inode, } rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, ecryptfs_dentry, mode, nd); - if (unlikely(rc)) { - ecryptfs_printk(KERN_ERR, - "Failure to create underlying file\n"); - goto out_lock; + if (rc) { + struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; + struct ecryptfs_inode_info *inode_info = + ecryptfs_inode_to_private(ecryptfs_inode); + + printk(KERN_WARNING "%s: Error creating underlying file; " + "rc = [%d]; checking for existing\n", __FUNCTION__, rc); + if (inode_info) { + mutex_lock(&inode_info->lower_file_mutex); + if (!inode_info->lower_file) { + mutex_unlock(&inode_info->lower_file_mutex); + printk(KERN_ERR "%s: Failure to set underlying " + "file; rc = [%d]\n", __FUNCTION__, rc); + goto out_lock; + } + mutex_unlock(&inode_info->lower_file_mutex); + } } rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, directory_inode->i_sb, 0); @@ -140,39 +153,30 @@ out: /** * grow_file - * @ecryptfs_dentry: the ecryptfs dentry - * @lower_file: The lower file - * @inode: The ecryptfs inode - * @lower_inode: The lower inode + * @ecryptfs_dentry: the eCryptfs dentry * * This is the code which will grow the file to its correct size. */ -static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file, - struct inode *inode, struct inode *lower_inode) +static int grow_file(struct dentry *ecryptfs_dentry) { - int rc = 0; + struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; struct file fake_file; struct ecryptfs_file_info tmp_file_info; + char zero_virt[] = { 0x00 }; + int rc = 0; memset(&fake_file, 0, sizeof(fake_file)); fake_file.f_path.dentry = ecryptfs_dentry; memset(&tmp_file_info, 0, sizeof(tmp_file_info)); ecryptfs_set_file_private(&fake_file, &tmp_file_info); - ecryptfs_set_file_lower(&fake_file, lower_file); - rc = ecryptfs_fill_zeros(&fake_file, 1); - if (rc) { - ecryptfs_inode_to_private(inode)->crypt_stat.flags |= - ECRYPTFS_SECURITY_WARNING; - ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros " - "in file; rc = [%d]\n", rc); - goto out; - } - i_size_write(inode, 0); - rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode, - inode, ecryptfs_dentry, - ECRYPTFS_LOWER_I_MUTEX_NOT_HELD); - ecryptfs_inode_to_private(inode)->crypt_stat.flags |= ECRYPTFS_NEW_FILE; -out: + ecryptfs_set_file_lower( + &fake_file, + ecryptfs_inode_to_private(ecryptfs_inode)->lower_file); + rc = ecryptfs_write(&fake_file, zero_virt, 0, 1); + i_size_write(ecryptfs_inode, 0); + rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); + ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |= + ECRYPTFS_NEW_FILE; return rc; } @@ -186,51 +190,31 @@ out: */ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) { + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; int rc = 0; - int lower_flags; - struct ecryptfs_crypt_stat *crypt_stat; - struct dentry *lower_dentry; - struct file *lower_file; - struct inode *inode, *lower_inode; - struct vfsmount *lower_mnt; - lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); - ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n", - lower_dentry->d_name.name); - inode = ecryptfs_dentry->d_inode; - crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; - lower_flags = ((O_CREAT | O_TRUNC) & O_ACCMODE) | O_RDWR; - lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); - /* Corresponding fput() at end of this function */ - if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt, - lower_flags))) { - ecryptfs_printk(KERN_ERR, - "Error opening dentry; rc = [%i]\n", rc); - goto out; - } - lower_inode = lower_dentry->d_inode; if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); - goto out_fput; + goto out; } crypt_stat->flags |= ECRYPTFS_NEW_FILE; ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n"); rc = ecryptfs_new_file_context(ecryptfs_dentry); if (rc) { - ecryptfs_printk(KERN_DEBUG, "Error creating new file " - "context\n"); - goto out_fput; + ecryptfs_printk(KERN_ERR, "Error creating new file " + "context; rc = [%d]\n", rc); + goto out; } - rc = ecryptfs_write_metadata(ecryptfs_dentry, lower_file); + rc = ecryptfs_write_metadata(ecryptfs_dentry); if (rc) { - ecryptfs_printk(KERN_DEBUG, "Error writing headers\n"); - goto out_fput; + printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); + goto out; } - rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode); -out_fput: - if ((rc = ecryptfs_close_lower_file(lower_file))) - printk(KERN_ERR "Error closing lower_file\n"); + rc = grow_file(ecryptfs_dentry); + if (rc) + printk(KERN_ERR "Error growing file; rc = [%d]\n", rc); out: return rc; } @@ -252,6 +236,8 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, { int rc; + /* ecryptfs_do_create() calls ecryptfs_interpose(), which opens + * the crypt_stat->lower_file (persistent file) */ rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); if (unlikely(rc)) { ecryptfs_printk(KERN_WARNING, "Failed to create file in" @@ -374,8 +360,8 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) ecryptfs_set_default_sizes(crypt_stat); - rc = ecryptfs_read_and_validate_header_region(page_virt, lower_dentry, - nd->mnt); + rc = ecryptfs_read_and_validate_header_region(page_virt, + dentry->d_inode); if (rc) { rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry); if (rc) { @@ -392,7 +378,8 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_sb)->mount_crypt_stat; if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - file_size = (crypt_stat->header_extent_size + file_size = ((crypt_stat->extent_size + * crypt_stat->num_header_extents_at_front) + i_size_read(lower_dentry->d_inode)); else file_size = i_size_read(lower_dentry->d_inode); @@ -722,8 +709,8 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, { loff_t lower_size; - lower_size = ( crypt_stat->header_extent_size - * crypt_stat->num_header_extents_at_front ); + lower_size = (crypt_stat->extent_size + * crypt_stat->num_header_extents_at_front); if (upper_size != 0) { loff_t num_extents; @@ -752,8 +739,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) int rc = 0; struct inode *inode = dentry->d_inode; struct dentry *lower_dentry; - struct vfsmount *lower_mnt; - struct file fake_ecryptfs_file, *lower_file = NULL; + struct file fake_ecryptfs_file; struct ecryptfs_crypt_stat *crypt_stat; loff_t i_size = i_size_read(inode); loff_t lower_size_before_truncate; @@ -776,62 +762,52 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) goto out; } lower_dentry = ecryptfs_dentry_to_lower(dentry); - /* This dget & mntget is released through fput at out_fput: */ - lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); - if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt, - O_RDWR))) { - ecryptfs_printk(KERN_ERR, - "Error opening dentry; rc = [%i]\n", rc); - goto out_free; - } - ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file); + ecryptfs_set_file_lower( + &fake_ecryptfs_file, + ecryptfs_inode_to_private(dentry->d_inode)->lower_file); /* Switch on growing or shrinking file */ if (new_length > i_size) { - rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length); - if (rc) { - ecryptfs_printk(KERN_ERR, - "Problem with fill_zeros\n"); - goto out_fput; - } - i_size_write(inode, new_length); - rc = ecryptfs_write_inode_size_to_metadata( - lower_file, lower_dentry->d_inode, inode, dentry, - ECRYPTFS_LOWER_I_MUTEX_NOT_HELD); - if (rc) { - printk(KERN_ERR "Problem with " - "ecryptfs_write_inode_size_to_metadata; " - "rc = [%d]\n", rc); - goto out_fput; - } + char zero[] = { 0x00 }; + + /* Write a single 0 at the last position of the file; + * this triggers code that will fill in 0's throughout + * the intermediate portion of the previous end of the + * file and the new and of the file */ + rc = ecryptfs_write(&fake_ecryptfs_file, zero, + (new_length - 1), 1); } else { /* new_length < i_size_read(inode) */ - pgoff_t index = 0; - int end_pos_in_page = -1; + /* We're chopping off all the pages down do the page + * in which new_length is located. Fill in the end of + * that page from (new_length & ~PAGE_CACHE_MASK) to + * PAGE_CACHE_SIZE with zeros. */ + size_t num_zeros = (PAGE_CACHE_SIZE + - (new_length & ~PAGE_CACHE_MASK)); - if (new_length != 0) { - index = ((new_length - 1) >> PAGE_CACHE_SHIFT); - end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK); - } - if (end_pos_in_page != (PAGE_CACHE_SIZE - 1)) { - if ((rc = ecryptfs_write_zeros(&fake_ecryptfs_file, - index, - (end_pos_in_page + 1), - ((PAGE_CACHE_SIZE - 1) - - end_pos_in_page)))) { + if (num_zeros) { + char *zeros_virt; + + zeros_virt = kzalloc(num_zeros, GFP_KERNEL); + if (!zeros_virt) { + rc = -ENOMEM; + goto out_free; + } + rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, + new_length, num_zeros); + kfree(zeros_virt); + if (rc) { printk(KERN_ERR "Error attempting to zero out " "the remainder of the end page on " "reducing truncate; rc = [%d]\n", rc); - goto out_fput; + goto out_free; } } vmtruncate(inode, new_length); - rc = ecryptfs_write_inode_size_to_metadata( - lower_file, lower_dentry->d_inode, inode, dentry, - ECRYPTFS_LOWER_I_MUTEX_NOT_HELD); + rc = ecryptfs_write_inode_size_to_metadata(inode); if (rc) { printk(KERN_ERR "Problem with " "ecryptfs_write_inode_size_to_metadata; " "rc = [%d]\n", rc); - goto out_fput; + goto out_free; } /* We are reducing the size of the ecryptfs file, and need to * know if we need to reduce the size of the lower file. */ @@ -843,13 +819,6 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) vmtruncate(lower_dentry->d_inode, lower_size_after_truncate); } - /* Update the access times */ - lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime - = CURRENT_TIME; - mark_inode_dirty_sync(inode); -out_fput: - if ((rc = ecryptfs_close_lower_file(lower_file))) - printk(KERN_ERR "Error closing lower_file\n"); out_free: if (ecryptfs_file_to_private(&fake_ecryptfs_file)) kmem_cache_free(ecryptfs_file_info_cache, @@ -909,23 +878,12 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) else if (S_ISREG(dentry->d_inode->i_mode) && (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) || !(crypt_stat->flags & ECRYPTFS_KEY_VALID))) { - struct vfsmount *lower_mnt; - struct file *lower_file = NULL; struct ecryptfs_mount_crypt_stat *mount_crypt_stat; - int lower_flags; - - lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); - lower_flags = O_RDONLY; - if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, - lower_mnt, lower_flags))) { - printk(KERN_ERR - "Error opening lower file; rc = [%d]\n", rc); - mutex_unlock(&crypt_stat->cs_mutex); - goto out; - } + mount_crypt_stat = &ecryptfs_superblock_to_private( dentry->d_sb)->mount_crypt_stat; - if ((rc = ecryptfs_read_metadata(dentry, lower_file))) { + rc = ecryptfs_read_metadata(dentry); + if (rc) { if (!(mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { rc = -EIO; @@ -935,16 +893,13 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) "enabled; returning -EIO\n"); mutex_unlock(&crypt_stat->cs_mutex); - fput(lower_file); goto out; } rc = 0; crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); mutex_unlock(&crypt_stat->cs_mutex); - fput(lower_file); goto out; } - fput(lower_file); } mutex_unlock(&crypt_stat->cs_mutex); if (ia->ia_valid & ATTR_SIZE) { @@ -986,13 +941,11 @@ out: } ssize_t -ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, - size_t size) +ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, + void *value, size_t size) { int rc = 0; - struct dentry *lower_dentry; - lower_dentry = ecryptfs_dentry_to_lower(dentry); if (!lower_dentry->d_inode->i_op->getxattr) { rc = -ENOSYS; goto out; @@ -1005,6 +958,14 @@ out: return rc; } +ssize_t +ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, + size_t size) +{ + return ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), name, + value, size); +} + static ssize_t ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size) { diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index b550dea8eee..89d9710dd63 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -39,7 +39,7 @@ * determine the type of error, make appropriate log entries, and * return an error code. */ -int process_request_key_err(long err_code) +static int process_request_key_err(long err_code) { int rc = 0; @@ -71,7 +71,7 @@ int process_request_key_err(long err_code) * address; zero on error * @length_size: The number of bytes occupied by the encoded length * - * Returns Zero on success + * Returns zero on success; non-zero on error */ static int parse_packet_length(unsigned char *data, size_t *size, size_t *length_size) @@ -106,11 +106,11 @@ out: /** * write_packet_length - * @dest: The byte array target into which to write the - * length. Must have at least 5 bytes allocated. + * @dest: The byte array target into which to write the length. Must + * have at least 5 bytes allocated. * @size: The length to write. - * @packet_size_length: The number of bytes used to encode the - * packet length is written to this address. + * @packet_size_length: The number of bytes used to encode the packet + * length is written to this address. * * Returns zero on success; non-zero on error. */ @@ -396,26 +396,53 @@ out: return rc; } +static int +ecryptfs_get_auth_tok_sig(char **sig, struct ecryptfs_auth_tok *auth_tok) +{ + int rc = 0; + + (*sig) = NULL; + switch (auth_tok->token_type) { + case ECRYPTFS_PASSWORD: + (*sig) = auth_tok->token.password.signature; + break; + case ECRYPTFS_PRIVATE_KEY: + (*sig) = auth_tok->token.private_key.signature; + break; + default: + printk(KERN_ERR "Cannot get sig for auth_tok of type [%d]\n", + auth_tok->token_type); + rc = -EINVAL; + } + return rc; +} + /** - * decrypt_pki_encrypted_session_key - Decrypt the session key with - * the given auth_tok. + * decrypt_pki_encrypted_session_key - Decrypt the session key with the given auth_tok. + * @auth_tok: The key authentication token used to decrypt the session key + * @crypt_stat: The cryptographic context * - * Returns Zero on success; non-zero error otherwise. + * Returns zero on success; non-zero error otherwise. */ -static int decrypt_pki_encrypted_session_key( - struct ecryptfs_mount_crypt_stat *mount_crypt_stat, - struct ecryptfs_auth_tok *auth_tok, - struct ecryptfs_crypt_stat *crypt_stat) +static int +decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, + struct ecryptfs_crypt_stat *crypt_stat) { u16 cipher_code = 0; struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; + char *auth_tok_sig; char *netlink_message; size_t netlink_message_length; int rc; - rc = write_tag_64_packet(mount_crypt_stat->global_auth_tok_sig, - &(auth_tok->session_key), + rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); + if (rc) { + printk(KERN_ERR "Unrecognized auth tok type: [%d]\n", + auth_tok->token_type); + goto out; + } + rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), &netlink_message, &netlink_message_length); if (rc) { ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); @@ -465,40 +492,33 @@ out: static void wipe_auth_tok_list(struct list_head *auth_tok_list_head) { - struct list_head *walker; struct ecryptfs_auth_tok_list_item *auth_tok_list_item; + struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp; - walker = auth_tok_list_head->next; - while (walker != auth_tok_list_head) { - auth_tok_list_item = - list_entry(walker, struct ecryptfs_auth_tok_list_item, - list); - walker = auth_tok_list_item->list.next; - memset(auth_tok_list_item, 0, - sizeof(struct ecryptfs_auth_tok_list_item)); + list_for_each_entry_safe(auth_tok_list_item, auth_tok_list_item_tmp, + auth_tok_list_head, list) { + list_del(&auth_tok_list_item->list); kmem_cache_free(ecryptfs_auth_tok_list_item_cache, auth_tok_list_item); } - auth_tok_list_head->next = NULL; } struct kmem_cache *ecryptfs_auth_tok_list_item_cache; - /** * parse_tag_1_packet - * @crypt_stat: The cryptographic context to modify based on packet - * contents. + * @crypt_stat: The cryptographic context to modify based on packet contents * @data: The raw bytes of the packet. * @auth_tok_list: eCryptfs parses packets into authentication tokens; - * a new authentication token will be placed at the end - * of this list for this packet. + * a new authentication token will be placed at the + * end of this list for this packet. * @new_auth_tok: Pointer to a pointer to memory that this function * allocates; sets the memory address of the pointer to * NULL on error. This object is added to the * auth_tok_list. * @packet_size: This function writes the size of the parsed packet * into this memory location; zero on error. + * @max_packet_size: The maximum allowable packet size * * Returns zero on success; non-zero on error. */ @@ -515,72 +535,65 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, (*packet_size) = 0; (*new_auth_tok) = NULL; - - /* we check that: - * one byte for the Tag 1 ID flag - * two bytes for the body size - * do not exceed the maximum_packet_size + /** + * This format is inspired by OpenPGP; see RFC 2440 + * packet tag 1 + * + * Tag 1 identifier (1 byte) + * Max Tag 1 packet size (max 3 bytes) + * Version (1 byte) + * Key identifier (8 bytes; ECRYPTFS_SIG_SIZE) + * Cipher identifier (1 byte) + * Encrypted key size (arbitrary) + * + * 12 bytes minimum packet size */ - if (unlikely((*packet_size) + 3 > max_packet_size)) { - ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + if (unlikely(max_packet_size < 12)) { + printk(KERN_ERR "Invalid max packet size; must be >=12\n"); rc = -EINVAL; goto out; } - /* check for Tag 1 identifier - one byte */ if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) { - ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n", - ECRYPTFS_TAG_1_PACKET_TYPE); + printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n", + ECRYPTFS_TAG_1_PACKET_TYPE); rc = -EINVAL; goto out; } /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or * at end of function upon failure */ auth_tok_list_item = - kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, - GFP_KERNEL); + kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache, + GFP_KERNEL); if (!auth_tok_list_item) { - ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); + printk(KERN_ERR "Unable to allocate memory\n"); rc = -ENOMEM; goto out; } - memset(auth_tok_list_item, 0, - sizeof(struct ecryptfs_auth_tok_list_item)); (*new_auth_tok) = &auth_tok_list_item->auth_tok; - /* check for body size - one to two bytes - * - * ***** TAG 1 Packet Format ***** - * | version number | 1 byte | - * | key ID | 8 bytes | - * | public key algorithm | 1 byte | - * | encrypted session key | arbitrary | - */ rc = parse_packet_length(&data[(*packet_size)], &body_size, &length_size); if (rc) { - ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " - "rc = [%d]\n", rc); + printk(KERN_WARNING "Error parsing packet length; " + "rc = [%d]\n", rc); goto out_free; } - if (unlikely(body_size < (0x02 + ECRYPTFS_SIG_SIZE))) { - ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", - body_size); + if (unlikely(body_size < (ECRYPTFS_SIG_SIZE + 2))) { + printk(KERN_WARNING "Invalid body size ([%td])\n", body_size); rc = -EINVAL; goto out_free; } (*packet_size) += length_size; if (unlikely((*packet_size) + body_size > max_packet_size)) { - ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + printk(KERN_WARNING "Packet size exceeds max\n"); rc = -EINVAL; goto out_free; } - /* Version 3 (from RFC2440) - one byte */ if (unlikely(data[(*packet_size)++] != 0x03)) { - ecryptfs_printk(KERN_DEBUG, "Unknown version number " - "[%d]\n", data[(*packet_size) - 1]); + printk(KERN_WARNING "Unknown version number [%d]\n", + data[(*packet_size) - 1]); rc = -EINVAL; goto out_free; } - /* Read Signature */ ecryptfs_to_hex((*new_auth_tok)->token.private_key.signature, &data[(*packet_size)], ECRYPTFS_SIG_SIZE); *packet_size += ECRYPTFS_SIG_SIZE; @@ -588,27 +601,23 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, * know which public key encryption algorithm was used */ (*packet_size)++; (*new_auth_tok)->session_key.encrypted_key_size = - body_size - (0x02 + ECRYPTFS_SIG_SIZE); + body_size - (ECRYPTFS_SIG_SIZE + 2); if ((*new_auth_tok)->session_key.encrypted_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { - ecryptfs_printk(KERN_ERR, "Tag 1 packet contains key larger " - "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES"); + printk(KERN_WARNING "Tag 1 packet contains key larger " + "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES"); rc = -EINVAL; goto out; } - ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n", - (*new_auth_tok)->session_key.encrypted_key_size); memcpy((*new_auth_tok)->session_key.encrypted_key, - &data[(*packet_size)], (body_size - 0x02 - ECRYPTFS_SIG_SIZE)); + &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2))); (*packet_size) += (*new_auth_tok)->session_key.encrypted_key_size; (*new_auth_tok)->session_key.flags &= ~ECRYPTFS_CONTAINS_DECRYPTED_KEY; (*new_auth_tok)->session_key.flags |= ECRYPTFS_CONTAINS_ENCRYPTED_KEY; (*new_auth_tok)->token_type = ECRYPTFS_PRIVATE_KEY; - (*new_auth_tok)->flags |= ECRYPTFS_PRIVATE_KEY; - /* TODO: Why are we setting this flag here? Don't we want the - * userspace to decrypt the session key? */ + (*new_auth_tok)->flags = 0; (*new_auth_tok)->session_key.flags &= ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT); (*new_auth_tok)->session_key.flags &= @@ -658,22 +667,30 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, (*packet_size) = 0; (*new_auth_tok) = NULL; - - /* we check that: - * one byte for the Tag 3 ID flag - * two bytes for the body size - * do not exceed the maximum_packet_size + /** + *This format is inspired by OpenPGP; see RFC 2440 + * packet tag 3 + * + * Tag 3 identifier (1 byte) + * Max Tag 3 packet size (max 3 bytes) + * Version (1 byte) + * Cipher code (1 byte) + * S2K specifier (1 byte) + * Hash identifier (1 byte) + * Salt (ECRYPTFS_SALT_SIZE) + * Hash iterations (1 byte) + * Encrypted key (arbitrary) + * + * (ECRYPTFS_SALT_SIZE + 7) minimum packet size */ - if (unlikely((*packet_size) + 3 > max_packet_size)) { - ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + if (max_packet_size < (ECRYPTFS_SALT_SIZE + 7)) { + printk(KERN_ERR "Max packet size too large\n"); rc = -EINVAL; goto out; } - - /* check for Tag 3 identifyer - one byte */ if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) { - ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n", - ECRYPTFS_TAG_3_PACKET_TYPE); + printk(KERN_ERR "First byte != 0x%.2x; invalid packet\n", + ECRYPTFS_TAG_3_PACKET_TYPE); rc = -EINVAL; goto out; } @@ -682,56 +699,37 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, auth_tok_list_item = kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL); if (!auth_tok_list_item) { - ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); + printk(KERN_ERR "Unable to allocate memory\n"); rc = -ENOMEM; goto out; } (*new_auth_tok) = &auth_tok_list_item->auth_tok; - - /* check for body size - one to two bytes */ rc = parse_packet_length(&data[(*packet_size)], &body_size, &length_size); if (rc) { - ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " - "rc = [%d]\n", rc); + printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n", + rc); goto out_free; } - if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) { - ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", - body_size); + if (unlikely(body_size < (ECRYPTFS_SALT_SIZE + 5))) { + printk(KERN_WARNING "Invalid body size ([%td])\n", body_size); rc = -EINVAL; goto out_free; } (*packet_size) += length_size; - - /* now we know the length of the remainting Tag 3 packet size: - * 5 fix bytes for: version string, cipher, S2K ID, hash algo, - * number of hash iterations - * ECRYPTFS_SALT_SIZE bytes for salt - * body_size bytes minus the stuff above is the encrypted key size - */ if (unlikely((*packet_size) + body_size > max_packet_size)) { - ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + printk(KERN_ERR "Packet size exceeds max\n"); rc = -EINVAL; goto out_free; } - - /* There are 5 characters of additional information in the - * packet */ (*new_auth_tok)->session_key.encrypted_key_size = - body_size - (0x05 + ECRYPTFS_SALT_SIZE); - ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n", - (*new_auth_tok)->session_key.encrypted_key_size); - - /* Version 4 (from RFC2440) - one byte */ + (body_size - (ECRYPTFS_SALT_SIZE + 5)); if (unlikely(data[(*packet_size)++] != 0x04)) { - ecryptfs_printk(KERN_DEBUG, "Unknown version number " - "[%d]\n", data[(*packet_size) - 1]); + printk(KERN_WARNING "Unknown version number [%d]\n", + data[(*packet_size) - 1]); rc = -EINVAL; goto out_free; } - - /* cipher - one byte */ ecryptfs_cipher_code_to_string(crypt_stat->cipher, (u16)data[(*packet_size)]); /* A little extra work to differentiate among the AES key @@ -745,33 +743,26 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, (*new_auth_tok)->session_key.encrypted_key_size; } ecryptfs_init_crypt_ctx(crypt_stat); - /* S2K identifier 3 (from RFC2440) */ if (unlikely(data[(*packet_size)++] != 0x03)) { - ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently " - "supported\n"); + printk(KERN_WARNING "Only S2K ID 3 is currently supported\n"); rc = -ENOSYS; goto out_free; } - /* TODO: finish the hash mapping */ - /* hash algorithm - one byte */ switch (data[(*packet_size)++]) { case 0x01: /* See RFC2440 for these numbers and their mappings */ /* Choose MD5 */ - /* salt - ECRYPTFS_SALT_SIZE bytes */ memcpy((*new_auth_tok)->token.password.salt, &data[(*packet_size)], ECRYPTFS_SALT_SIZE); (*packet_size) += ECRYPTFS_SALT_SIZE; - /* This conversion was taken straight from RFC2440 */ - /* number of hash iterations - one byte */ (*new_auth_tok)->token.password.hash_iterations = ((u32) 16 + (data[(*packet_size)] & 15)) << ((data[(*packet_size)] >> 4) + 6); (*packet_size)++; - - /* encrypted session key - - * (body_size-5-ECRYPTFS_SALT_SIZE) bytes */ + /* Friendly reminder: + * (*new_auth_tok)->session_key.encrypted_key_size = + * (body_size - (ECRYPTFS_SALT_SIZE + 5)); */ memcpy((*new_auth_tok)->session_key.encrypted_key, &data[(*packet_size)], (*new_auth_tok)->session_key.encrypted_key_size); @@ -781,7 +772,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, ~ECRYPTFS_CONTAINS_DECRYPTED_KEY; (*new_auth_tok)->session_key.flags |= ECRYPTFS_CONTAINS_ENCRYPTED_KEY; - (*new_auth_tok)->token.password.hash_algo = 0x01; + (*new_auth_tok)->token.password.hash_algo = 0x01; /* MD5 */ break; default: ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: " @@ -837,82 +828,61 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents, (*packet_size) = 0; (*tag_11_contents_size) = 0; - - /* check that: - * one byte for the Tag 11 ID flag - * two bytes for the Tag 11 length - * do not exceed the maximum_packet_size + /* This format is inspired by OpenPGP; see RFC 2440 + * packet tag 11 + * + * Tag 11 identifier (1 byte) + * Max Tag 11 packet size (max 3 bytes) + * Binary format specifier (1 byte) + * Filename length (1 byte) + * Filename ("_CONSOLE") (8 bytes) + * Modification date (4 bytes) + * Literal data (arbitrary) + * + * We need at least 16 bytes of data for the packet to even be + * valid. */ - if (unlikely((*packet_size) + 3 > max_packet_size)) { - ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + if (max_packet_size < 16) { + printk(KERN_ERR "Maximum packet size too small\n"); rc = -EINVAL; goto out; } - - /* check for Tag 11 identifyer - one byte */ if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) { - ecryptfs_printk(KERN_WARNING, - "Invalid tag 11 packet format\n"); + printk(KERN_WARNING "Invalid tag 11 packet format\n"); rc = -EINVAL; goto out; } - - /* get Tag 11 content length - one or two bytes */ rc = parse_packet_length(&data[(*packet_size)], &body_size, &length_size); if (rc) { - ecryptfs_printk(KERN_WARNING, - "Invalid tag 11 packet format\n"); + printk(KERN_WARNING "Invalid tag 11 packet format\n"); goto out; } - (*packet_size) += length_size; - - if (body_size < 13) { - ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n", - body_size); + if (body_size < 14) { + printk(KERN_WARNING "Invalid body size ([%td])\n", body_size); rc = -EINVAL; goto out; } - /* We have 13 bytes of surrounding packet values */ - (*tag_11_contents_size) = (body_size - 13); - - /* now we know the length of the remainting Tag 11 packet size: - * 14 fix bytes for: special flag one, special flag two, - * 12 skipped bytes - * body_size bytes minus the stuff above is the Tag 11 content - */ - /* FIXME why is the body size one byte smaller than the actual - * size of the body? - * this seems to be an error here as well as in - * write_tag_11_packet() */ + (*packet_size) += length_size; + (*tag_11_contents_size) = (body_size - 14); if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) { - ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n"); + printk(KERN_ERR "Packet size exceeds max\n"); rc = -EINVAL; goto out; } - - /* special flag one - one byte */ if (data[(*packet_size)++] != 0x62) { - ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); + printk(KERN_WARNING "Unrecognizable packet\n"); rc = -EINVAL; goto out; } - - /* special flag two - one byte */ if (data[(*packet_size)++] != 0x08) { - ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n"); + printk(KERN_WARNING "Unrecognizable packet\n"); rc = -EINVAL; goto out; } - - /* skip the next 12 bytes */ - (*packet_size) += 12; /* We don't care about the filename or - * the timestamp */ - - /* get the Tag 11 contents - tag_11_contents_size bytes */ + (*packet_size) += 12; /* Ignore filename and modification date */ memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size)); (*packet_size) += (*tag_11_contents_size); - out: if (rc) { (*packet_size) = 0; @@ -921,130 +891,229 @@ out: return rc; } +static int +ecryptfs_find_global_auth_tok_for_sig( + struct ecryptfs_global_auth_tok **global_auth_tok, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig) +{ + struct ecryptfs_global_auth_tok *walker; + int rc = 0; + + (*global_auth_tok) = NULL; + mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_for_each_entry(walker, + &mount_crypt_stat->global_auth_tok_list, + mount_crypt_stat_list) { + if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) { + (*global_auth_tok) = walker; + goto out; + } + } + rc = -EINVAL; +out: + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); + return rc; +} + /** - * decrypt_session_key - Decrypt the session key with the given auth_tok. + * ecryptfs_verify_version + * @version: The version number to confirm * - * Returns Zero on success; non-zero error otherwise. + * Returns zero on good version; non-zero otherwise */ -static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, - struct ecryptfs_crypt_stat *crypt_stat) +static int ecryptfs_verify_version(u16 version) { - struct ecryptfs_password *password_s_ptr; - struct scatterlist src_sg[2], dst_sg[2]; - struct mutex *tfm_mutex = NULL; - char *encrypted_session_key; - char *session_key; + int rc = 0; + unsigned char major; + unsigned char minor; + + major = ((version >> 8) & 0xFF); + minor = (version & 0xFF); + if (major != ECRYPTFS_VERSION_MAJOR) { + ecryptfs_printk(KERN_ERR, "Major version number mismatch. " + "Expected [%d]; got [%d]\n", + ECRYPTFS_VERSION_MAJOR, major); + rc = -EINVAL; + goto out; + } + if (minor != ECRYPTFS_VERSION_MINOR) { + ecryptfs_printk(KERN_ERR, "Minor version number mismatch. " + "Expected [%d]; got [%d]\n", + ECRYPTFS_VERSION_MINOR, minor); + rc = -EINVAL; + goto out; + } +out: + return rc; +} + +int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, + struct ecryptfs_auth_tok **auth_tok, + char *sig) +{ + int rc = 0; + + (*auth_tok_key) = request_key(&key_type_user, sig, NULL); + if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { + printk(KERN_ERR "Could not find key with description: [%s]\n", + sig); + process_request_key_err(PTR_ERR(*auth_tok_key)); + rc = -EINVAL; + goto out; + } + (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key); + if (ecryptfs_verify_version((*auth_tok)->version)) { + printk(KERN_ERR + "Data structure version mismatch. " + "Userspace tools must match eCryptfs " + "kernel module with major version [%d] " + "and minor version [%d]\n", + ECRYPTFS_VERSION_MAJOR, + ECRYPTFS_VERSION_MINOR); + rc = -EINVAL; + goto out; + } + if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD + && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) { + printk(KERN_ERR "Invalid auth_tok structure " + "returned from key query\n"); + rc = -EINVAL; + goto out; + } +out: + return rc; +} + +/** + * ecryptfs_find_auth_tok_for_sig + * @auth_tok: Set to the matching auth_tok; NULL if not found + * @crypt_stat: inode crypt_stat crypto context + * @sig: Sig of auth_tok to find + * + * For now, this function simply looks at the registered auth_tok's + * linked off the mount_crypt_stat, so all the auth_toks that can be + * used must be registered at mount time. This function could + * potentially try a lot harder to find auth_tok's (e.g., by calling + * out to ecryptfsd to dynamically retrieve an auth_tok object) so + * that static registration of auth_tok's will no longer be necessary. + * + * Returns zero on no error; non-zero on error + */ +static int +ecryptfs_find_auth_tok_for_sig( + struct ecryptfs_auth_tok **auth_tok, + struct ecryptfs_crypt_stat *crypt_stat, char *sig) +{ + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + crypt_stat->mount_crypt_stat; + struct ecryptfs_global_auth_tok *global_auth_tok; + int rc = 0; + + (*auth_tok) = NULL; + if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, + mount_crypt_stat, sig)) { + struct key *auth_tok_key; + + rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok, + sig); + } else + (*auth_tok) = global_auth_tok->global_auth_tok; + return rc; +} + +/** + * decrypt_passphrase_encrypted_session_key - Decrypt the session key with the given auth_tok. + * @auth_tok: The passphrase authentication token to use to encrypt the FEK + * @crypt_stat: The cryptographic context + * + * Returns zero on success; non-zero error otherwise + */ +static int +decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, + struct ecryptfs_crypt_stat *crypt_stat) +{ + struct scatterlist dst_sg; + struct scatterlist src_sg; + struct mutex *tfm_mutex; struct blkcipher_desc desc = { .flags = CRYPTO_TFM_REQ_MAY_SLEEP }; int rc = 0; - password_s_ptr = &auth_tok->token.password; - if (password_s_ptr->flags & ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET) - ecryptfs_printk(KERN_DEBUG, "Session key encryption key " - "set; skipping key generation\n"); - ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])" - ":\n", - password_s_ptr->session_key_encryption_key_bytes); - if (ecryptfs_verbosity > 0) - ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key, - password_s_ptr-> - session_key_encryption_key_bytes); - if (!strcmp(crypt_stat->cipher, - crypt_stat->mount_crypt_stat->global_default_cipher_name) - && crypt_stat->mount_crypt_stat->global_key_tfm) { - desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm; - tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; - } else { - char *full_alg_name; - - rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, - crypt_stat->cipher, - "ecb"); - if (rc) - goto out; - desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0, - CRYPTO_ALG_ASYNC); - kfree(full_alg_name); - if (IS_ERR(desc.tfm)) { - rc = PTR_ERR(desc.tfm); - printk(KERN_ERR "Error allocating crypto context; " - "rc = [%d]\n", rc); - goto out; - } - crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk( + KERN_DEBUG, "Session key encryption key (size [%d]):\n", + auth_tok->token.password.session_key_encryption_key_bytes); + ecryptfs_dump_hex( + auth_tok->token.password.session_key_encryption_key, + auth_tok->token.password.session_key_encryption_key_bytes); + } + rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex, + crypt_stat->cipher); + if (unlikely(rc)) { + printk(KERN_ERR "Internal error whilst attempting to get " + "tfm and mutex for cipher name [%s]; rc = [%d]\n", + crypt_stat->cipher, rc); + goto out; } - if (tfm_mutex) - mutex_lock(tfm_mutex); - rc = crypto_blkcipher_setkey(desc.tfm, - password_s_ptr->session_key_encryption_key, - crypt_stat->key_size); - if (rc < 0) { + rc = virt_to_scatterlist(auth_tok->session_key.encrypted_key, + auth_tok->session_key.encrypted_key_size, + &src_sg, 1); + if (rc != 1) { + printk(KERN_ERR "Internal error whilst attempting to convert " + "auth_tok->session_key.encrypted_key to scatterlist; " + "expected rc = 1; got rc = [%d]. " + "auth_tok->session_key.encrypted_key_size = [%d]\n", rc, + auth_tok->session_key.encrypted_key_size); + goto out; + } + auth_tok->session_key.decrypted_key_size = + auth_tok->session_key.encrypted_key_size; + rc = virt_to_scatterlist(auth_tok->session_key.decrypted_key, + auth_tok->session_key.decrypted_key_size, + &dst_sg, 1); + if (rc != 1) { + printk(KERN_ERR "Internal error whilst attempting to convert " + "auth_tok->session_key.decrypted_key to scatterlist; " + "expected rc = 1; got rc = [%d]\n", rc); + goto out; + } + mutex_lock(tfm_mutex); + rc = crypto_blkcipher_setkey( + desc.tfm, auth_tok->token.password.session_key_encryption_key, + crypt_stat->key_size); + if (unlikely(rc < 0)) { + mutex_unlock(tfm_mutex); printk(KERN_ERR "Error setting key for crypto context\n"); rc = -EINVAL; - goto out_free_tfm; - } - /* TODO: virt_to_scatterlist */ - encrypted_session_key = (char *)__get_free_page(GFP_KERNEL); - if (!encrypted_session_key) { - ecryptfs_printk(KERN_ERR, "Out of memory\n"); - rc = -ENOMEM; - goto out_free_tfm; + goto out; } - session_key = (char *)__get_free_page(GFP_KERNEL); - if (!session_key) { - kfree(encrypted_session_key); - ecryptfs_printk(KERN_ERR, "Out of memory\n"); - rc = -ENOMEM; - goto out_free_tfm; - } - memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key, - auth_tok->session_key.encrypted_key_size); - src_sg[0].page = virt_to_page(encrypted_session_key); - src_sg[0].offset = 0; - BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE); - src_sg[0].length = auth_tok->session_key.encrypted_key_size; - dst_sg[0].page = virt_to_page(session_key); - dst_sg[0].offset = 0; - auth_tok->session_key.decrypted_key_size = - auth_tok->session_key.encrypted_key_size; - dst_sg[0].length = auth_tok->session_key.encrypted_key_size; - rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg, + rc = crypto_blkcipher_decrypt(&desc, &dst_sg, &src_sg, auth_tok->session_key.encrypted_key_size); - if (rc) { + mutex_unlock(tfm_mutex); + if (unlikely(rc)) { printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc); - goto out_free_memory; + goto out; } - auth_tok->session_key.decrypted_key_size = - auth_tok->session_key.encrypted_key_size; - memcpy(auth_tok->session_key.decrypted_key, session_key, - auth_tok->session_key.decrypted_key_size); auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, auth_tok->session_key.decrypted_key_size); crypt_stat->flags |= ECRYPTFS_KEY_VALID; - ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); - if (ecryptfs_verbosity > 0) + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n", + crypt_stat->key_size); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); -out_free_memory: - memset(encrypted_session_key, 0, PAGE_CACHE_SIZE); - free_page((unsigned long)encrypted_session_key); - memset(session_key, 0, PAGE_CACHE_SIZE); - free_page((unsigned long)session_key); -out_free_tfm: - if (tfm_mutex) - mutex_unlock(tfm_mutex); - else - crypto_free_blkcipher(desc.tfm); + } out: return rc; } /** * ecryptfs_parse_packet_set - * @dest: The header page in memory - * @version: Version of file format, to guide parsing behavior + * @crypt_stat: The cryptographic context + * @src: Virtual address of region of memory containing the packets + * @ecryptfs_dentry: The eCryptfs dentry associated with the packet set * * Get crypt_stat to have the file's session key if the requisite key * is available to decrypt the session key. @@ -1058,25 +1127,22 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, struct dentry *ecryptfs_dentry) { size_t i = 0; - size_t found_auth_tok = 0; + size_t found_auth_tok; size_t next_packet_is_auth_tok_packet; - char sig[ECRYPTFS_SIG_SIZE_HEX]; struct list_head auth_tok_list; - struct list_head *walker; - struct ecryptfs_auth_tok *chosen_auth_tok = NULL; - struct ecryptfs_mount_crypt_stat *mount_crypt_stat = - &ecryptfs_superblock_to_private( - ecryptfs_dentry->d_sb)->mount_crypt_stat; - struct ecryptfs_auth_tok *candidate_auth_tok = NULL; + struct ecryptfs_auth_tok *matching_auth_tok; + struct ecryptfs_auth_tok *candidate_auth_tok; + char *candidate_auth_tok_sig; size_t packet_size; struct ecryptfs_auth_tok *new_auth_tok; unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE]; + struct ecryptfs_auth_tok_list_item *auth_tok_list_item; size_t tag_11_contents_size; size_t tag_11_packet_size; int rc = 0; INIT_LIST_HEAD(&auth_tok_list); - /* Parse the header to find as many packets as we can, these will be + /* Parse the header to find as many packets as we can; these will be * added the our &auth_tok_list */ next_packet_is_auth_tok_packet = 1; while (next_packet_is_auth_tok_packet) { @@ -1155,73 +1221,85 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, } } if (list_empty(&auth_tok_list)) { - rc = -EINVAL; /* Do not support non-encrypted files in - * the 0.1 release */ + printk(KERN_ERR "The lower file appears to be a non-encrypted " + "eCryptfs file; this is not supported in this version " + "of the eCryptfs kernel module\n"); + rc = -EINVAL; goto out; } - /* If we have a global auth tok, then we should try to use - * it */ - if (mount_crypt_stat->global_auth_tok) { - memcpy(sig, mount_crypt_stat->global_auth_tok_sig, - ECRYPTFS_SIG_SIZE_HEX); - chosen_auth_tok = mount_crypt_stat->global_auth_tok; - } else - BUG(); /* We should always have a global auth tok in - * the 0.1 release */ - /* Scan list to see if our chosen_auth_tok works */ - list_for_each(walker, &auth_tok_list) { - struct ecryptfs_auth_tok_list_item *auth_tok_list_item; - auth_tok_list_item = - list_entry(walker, struct ecryptfs_auth_tok_list_item, - list); + /* auth_tok_list contains the set of authentication tokens + * parsed from the metadata. We need to find a matching + * authentication token that has the secret component(s) + * necessary to decrypt the EFEK in the auth_tok parsed from + * the metadata. There may be several potential matches, but + * just one will be sufficient to decrypt to get the FEK. */ +find_next_matching_auth_tok: + found_auth_tok = 0; + list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) { candidate_auth_tok = &auth_tok_list_item->auth_tok; if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Considering cadidate auth tok:\n"); ecryptfs_dump_auth_tok(candidate_auth_tok); } - /* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */ - if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD - && !strncmp(candidate_auth_tok->token.password.signature, - sig, ECRYPTFS_SIG_SIZE_HEX)) { - found_auth_tok = 1; - goto leave_list; - /* TODO: Transfer the common salt into the - * crypt_stat salt */ - } else if ((candidate_auth_tok->token_type - == ECRYPTFS_PRIVATE_KEY) - && !strncmp(candidate_auth_tok->token.private_key.signature, - sig, ECRYPTFS_SIG_SIZE_HEX)) { + rc = ecryptfs_get_auth_tok_sig(&candidate_auth_tok_sig, + candidate_auth_tok); + if (rc) { + printk(KERN_ERR + "Unrecognized candidate auth tok type: [%d]\n", + candidate_auth_tok->token_type); + rc = -EINVAL; + goto out_wipe_list; + } + ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, crypt_stat, + candidate_auth_tok_sig); + if (matching_auth_tok) { found_auth_tok = 1; - goto leave_list; + goto found_matching_auth_tok; } } if (!found_auth_tok) { - ecryptfs_printk(KERN_ERR, "Could not find authentication " - "token on temporary list for sig [%.*s]\n", - ECRYPTFS_SIG_SIZE_HEX, sig); + ecryptfs_printk(KERN_ERR, "Could not find a usable " + "authentication token\n"); rc = -EIO; goto out_wipe_list; } -leave_list: - rc = -ENOTSUPP; +found_matching_auth_tok: if (candidate_auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { memcpy(&(candidate_auth_tok->token.private_key), - &(chosen_auth_tok->token.private_key), + &(matching_auth_tok->token.private_key), sizeof(struct ecryptfs_private_key)); - rc = decrypt_pki_encrypted_session_key(mount_crypt_stat, - candidate_auth_tok, + rc = decrypt_pki_encrypted_session_key(candidate_auth_tok, crypt_stat); } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) { memcpy(&(candidate_auth_tok->token.password), - &(chosen_auth_tok->token.password), + &(matching_auth_tok->token.password), sizeof(struct ecryptfs_password)); - rc = decrypt_session_key(candidate_auth_tok, crypt_stat); + rc = decrypt_passphrase_encrypted_session_key( + candidate_auth_tok, crypt_stat); } if (rc) { - ecryptfs_printk(KERN_ERR, "Error decrypting the " - "session key; rc = [%d]\n", rc); - goto out_wipe_list; + struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp; + + ecryptfs_printk(KERN_WARNING, "Error decrypting the " + "session key for authentication token with sig " + "[%.*s]; rc = [%d]. Removing auth tok " + "candidate from the list and searching for " + "the next match.\n", candidate_auth_tok_sig, + ECRYPTFS_SIG_SIZE_HEX, rc); + list_for_each_entry_safe(auth_tok_list_item, + auth_tok_list_item_tmp, + &auth_tok_list, list) { + if (candidate_auth_tok + == &auth_tok_list_item->auth_tok) { + list_del(&auth_tok_list_item->list); + kmem_cache_free( + ecryptfs_auth_tok_list_item_cache, + auth_tok_list_item); + goto find_next_matching_auth_tok; + } + } + BUG(); } rc = ecryptfs_compute_root_iv(crypt_stat); if (rc) { @@ -1240,6 +1318,7 @@ out_wipe_list: out: return rc; } + static int pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_crypt_stat *crypt_stat, @@ -1284,22 +1363,25 @@ out: /** * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet * @dest: Buffer into which to write the packet - * @max: Maximum number of bytes that can be writtn + * @remaining_bytes: Maximum number of bytes that can be writtn + * @auth_tok: The authentication token used for generating the tag 1 packet + * @crypt_stat: The cryptographic context + * @key_rec: The key record struct for the tag 1 packet * @packet_size: This function will write the number of bytes that end * up constituting the packet; set to zero on error * * Returns zero on success; non-zero on error. */ static int -write_tag_1_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, +write_tag_1_packet(char *dest, size_t *remaining_bytes, + struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_crypt_stat *crypt_stat, - struct ecryptfs_mount_crypt_stat *mount_crypt_stat, struct ecryptfs_key_record *key_rec, size_t *packet_size) { size_t i; size_t encrypted_session_key_valid = 0; - size_t key_rec_size; size_t packet_size_length; + size_t max_packet_size; int rc = 0; (*packet_size) = 0; @@ -1329,37 +1411,23 @@ write_tag_1_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size); } encrypted_session_key_set: - /* Now we have a valid key_rec. Append it to the - * key_rec set. */ - key_rec_size = (sizeof(struct ecryptfs_key_record) - - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES - + (key_rec->enc_key_size)); - /* TODO: Include a packet size limit as a parameter to this - * function once we have multi-packet headers (for versions - * later than 0.1 */ - if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) { - ecryptfs_printk(KERN_ERR, "Keyset too large\n"); - rc = -EINVAL; - goto out; - } - /* ***** TAG 1 Packet Format ***** - * | version number | 1 byte | - * | key ID | 8 bytes | - * | public key algorithm | 1 byte | - * | encrypted session key | arbitrary | - */ - if ((0x02 + ECRYPTFS_SIG_SIZE + key_rec->enc_key_size) >= max) { - ecryptfs_printk(KERN_ERR, - "Authentication token is too large\n"); + /* This format is inspired by OpenPGP; see RFC 2440 + * packet tag 1 */ + max_packet_size = (1 /* Tag 1 identifier */ + + 3 /* Max Tag 1 packet size */ + + 1 /* Version */ + + ECRYPTFS_SIG_SIZE /* Key identifier */ + + 1 /* Cipher identifier */ + + key_rec->enc_key_size); /* Encrypted key size */ + if (max_packet_size > (*remaining_bytes)) { + printk(KERN_ERR "Packet length larger than maximum allowable; " + "need up to [%td] bytes, but there are only [%td] " + "available\n", max_packet_size, (*remaining_bytes)); rc = -EINVAL; goto out; } dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; - /* This format is inspired by OpenPGP; see RFC 2440 - * packet tag 1 */ - rc = write_packet_length(&dest[(*packet_size)], - (0x02 + ECRYPTFS_SIG_SIZE + - key_rec->enc_key_size), + rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), &packet_size_length); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " @@ -1377,13 +1445,15 @@ encrypted_session_key_set: out: if (rc) (*packet_size) = 0; + else + (*remaining_bytes) -= (*packet_size); return rc; } /** * write_tag_11_packet * @dest: Target into which Tag 11 packet is to be written - * @max: Maximum packet length + * @remaining_bytes: Maximum packet length * @contents: Byte array of contents to copy in * @contents_length: Number of bytes in contents * @packet_length: Length of the Tag 11 packet written; zero on error @@ -1391,54 +1461,59 @@ out: * Returns zero on success; non-zero on error. */ static int -write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length, - size_t *packet_length) +write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents, + size_t contents_length, size_t *packet_length) { size_t packet_size_length; + size_t max_packet_size; int rc = 0; (*packet_length) = 0; - if ((13 + contents_length) > max) { + /* This format is inspired by OpenPGP; see RFC 2440 + * packet tag 11 */ + max_packet_size = (1 /* Tag 11 identifier */ + + 3 /* Max Tag 11 packet size */ + + 1 /* Binary format specifier */ + + 1 /* Filename length */ + + 8 /* Filename ("_CONSOLE") */ + + 4 /* Modification date */ + + contents_length); /* Literal data */ + if (max_packet_size > (*remaining_bytes)) { + printk(KERN_ERR "Packet length larger than maximum allowable; " + "need up to [%td] bytes, but there are only [%td] " + "available\n", max_packet_size, (*remaining_bytes)); rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Packet length larger than " - "maximum allowable\n"); goto out; } - /* General packet header */ - /* Packet tag */ dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; - /* Packet length */ rc = write_packet_length(&dest[(*packet_length)], - (13 + contents_length), &packet_size_length); + (max_packet_size - 4), &packet_size_length); if (rc) { - ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet " - "header; cannot generate packet length\n"); + printk(KERN_ERR "Error generating tag 11 packet header; cannot " + "generate packet length. rc = [%d]\n", rc); goto out; } (*packet_length) += packet_size_length; - /* Tag 11 specific */ - /* One-octet field that describes how the data is formatted */ - dest[(*packet_length)++] = 0x62; /* binary data */ - /* One-octet filename length followed by filename */ + dest[(*packet_length)++] = 0x62; /* binary data format specifier */ dest[(*packet_length)++] = 8; memcpy(&dest[(*packet_length)], "_CONSOLE", 8); (*packet_length) += 8; - /* Four-octet number indicating modification date */ memset(&dest[(*packet_length)], 0x00, 4); (*packet_length) += 4; - /* Remainder is literal data */ memcpy(&dest[(*packet_length)], contents, contents_length); (*packet_length) += contents_length; out: if (rc) (*packet_length) = 0; + else + (*remaining_bytes) -= (*packet_length); return rc; } /** * write_tag_3_packet * @dest: Buffer into which to write the packet - * @max: Maximum number of bytes that can be written + * @remaining_bytes: Maximum number of bytes that can be written * @auth_tok: Authentication token * @crypt_stat: The cryptographic context * @key_rec: encrypted key @@ -1448,19 +1523,22 @@ write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length, * Returns zero on success; non-zero on error. */ static int -write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, +write_tag_3_packet(char *dest, size_t *remaining_bytes, + struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_crypt_stat *crypt_stat, struct ecryptfs_key_record *key_rec, size_t *packet_size) { size_t i; size_t encrypted_session_key_valid = 0; char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; - struct scatterlist dest_sg[2]; - struct scatterlist src_sg[2]; + struct scatterlist dst_sg; + struct scatterlist src_sg; struct mutex *tfm_mutex = NULL; - size_t key_rec_size; - size_t packet_size_length; size_t cipher_code; + size_t packet_size_length; + size_t max_packet_size; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + crypt_stat->mount_crypt_stat; struct blkcipher_desc desc = { .tfm = NULL, .flags = CRYPTO_TFM_REQ_MAY_SLEEP @@ -1470,16 +1548,25 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, (*packet_size) = 0; ecryptfs_from_hex(key_rec->sig, auth_tok->token.password.signature, ECRYPTFS_SIG_SIZE); - encrypted_session_key_valid = 0; - for (i = 0; i < crypt_stat->key_size; i++) - encrypted_session_key_valid |= - auth_tok->session_key.encrypted_key[i]; - if (encrypted_session_key_valid) { - memcpy(key_rec->enc_key, - auth_tok->session_key.encrypted_key, - auth_tok->session_key.encrypted_key_size); - goto encrypted_session_key_set; + rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex, + crypt_stat->cipher); + if (unlikely(rc)) { + printk(KERN_ERR "Internal error whilst attempting to get " + "tfm and mutex for cipher name [%s]; rc = [%d]\n", + crypt_stat->cipher, rc); + goto out; + } + if (mount_crypt_stat->global_default_cipher_key_size == 0) { + struct blkcipher_alg *alg = crypto_blkcipher_alg(desc.tfm); + + printk(KERN_WARNING "No key size specified at mount; " + "defaulting to [%d]\n", alg->max_keysize); + mount_crypt_stat->global_default_cipher_key_size = + alg->max_keysize; } + if (crypt_stat->key_size == 0) + crypt_stat->key_size = + mount_crypt_stat->global_default_cipher_key_size; if (auth_tok->session_key.encrypted_key_size == 0) auth_tok->session_key.encrypted_key_size = crypt_stat->key_size; @@ -1487,9 +1574,24 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, && strcmp("aes", crypt_stat->cipher) == 0) { memset((crypt_stat->key + 24), 0, 8); auth_tok->session_key.encrypted_key_size = 32; - } + } else + auth_tok->session_key.encrypted_key_size = crypt_stat->key_size; key_rec->enc_key_size = auth_tok->session_key.encrypted_key_size; + encrypted_session_key_valid = 0; + for (i = 0; i < auth_tok->session_key.encrypted_key_size; i++) + encrypted_session_key_valid |= + auth_tok->session_key.encrypted_key[i]; + if (encrypted_session_key_valid) { + ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; " + "using auth_tok->session_key.encrypted_key, " + "where key_rec->enc_key_size = [%d]\n", + key_rec->enc_key_size); + memcpy(key_rec->enc_key, + auth_tok->session_key.encrypted_key, + key_rec->enc_key_size); + goto encrypted_session_key_set; + } if (auth_tok->token.password.flags & ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET) { ecryptfs_printk(KERN_DEBUG, "Using previously generated " @@ -1508,54 +1610,32 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n"); ecryptfs_dump_hex(session_key_encryption_key, 16); } - rc = virt_to_scatterlist(crypt_stat->key, - key_rec->enc_key_size, src_sg, 2); - if (!rc) { + rc = virt_to_scatterlist(crypt_stat->key, key_rec->enc_key_size, + &src_sg, 1); + if (rc != 1) { ecryptfs_printk(KERN_ERR, "Error generating scatterlist " - "for crypt_stat session key\n"); + "for crypt_stat session key; expected rc = 1; " + "got rc = [%d]. key_rec->enc_key_size = [%d]\n", + rc, key_rec->enc_key_size); rc = -ENOMEM; goto out; } - rc = virt_to_scatterlist(key_rec->enc_key, - key_rec->enc_key_size, dest_sg, 2); - if (!rc) { + rc = virt_to_scatterlist(key_rec->enc_key, key_rec->enc_key_size, + &dst_sg, 1); + if (rc != 1) { ecryptfs_printk(KERN_ERR, "Error generating scatterlist " - "for crypt_stat encrypted session key\n"); + "for crypt_stat encrypted session key; " + "expected rc = 1; got rc = [%d]. " + "key_rec->enc_key_size = [%d]\n", rc, + key_rec->enc_key_size); rc = -ENOMEM; goto out; } - if (!strcmp(crypt_stat->cipher, - crypt_stat->mount_crypt_stat->global_default_cipher_name) - && crypt_stat->mount_crypt_stat->global_key_tfm) { - desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm; - tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; - } else { - char *full_alg_name; - - rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, - crypt_stat->cipher, - "ecb"); - if (rc) - goto out; - desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0, - CRYPTO_ALG_ASYNC); - kfree(full_alg_name); - if (IS_ERR(desc.tfm)) { - rc = PTR_ERR(desc.tfm); - ecryptfs_printk(KERN_ERR, "Could not initialize crypto " - "context for cipher [%s]; rc = [%d]\n", - crypt_stat->cipher, rc); - goto out; - } - crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY); - } - if (tfm_mutex) - mutex_lock(tfm_mutex); + mutex_lock(tfm_mutex); rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key, crypt_stat->key_size); if (rc < 0) { - if (tfm_mutex) - mutex_unlock(tfm_mutex); + mutex_unlock(tfm_mutex); ecryptfs_printk(KERN_ERR, "Error setting key for crypto " "context; rc = [%d]\n", rc); goto out; @@ -1563,56 +1643,53 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, rc = 0; ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", crypt_stat->key_size); - rc = crypto_blkcipher_encrypt(&desc, dest_sg, src_sg, + rc = crypto_blkcipher_encrypt(&desc, &dst_sg, &src_sg, (*key_rec).enc_key_size); + mutex_unlock(tfm_mutex); if (rc) { printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc); goto out; } - if (tfm_mutex) - mutex_unlock(tfm_mutex); ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); - if (ecryptfs_verbosity > 0) + if (ecryptfs_verbosity > 0) { + ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n", + key_rec->enc_key_size); ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size); -encrypted_session_key_set: - /* Now we have a valid key_rec. Append it to the - * key_rec set. */ - key_rec_size = (sizeof(struct ecryptfs_key_record) - - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES - + (key_rec->enc_key_size)); - /* TODO: Include a packet size limit as a parameter to this - * function once we have multi-packet headers (for versions - * later than 0.1 */ - if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) { - ecryptfs_printk(KERN_ERR, "Keyset too large\n"); - rc = -EINVAL; - goto out; } - /* TODO: Packet size limit */ - /* We have 5 bytes of surrounding packet data */ - if ((0x05 + ECRYPTFS_SALT_SIZE - + key_rec->enc_key_size) >= max) { - ecryptfs_printk(KERN_ERR, "Authentication token is too " - "large\n"); +encrypted_session_key_set: + /* This format is inspired by OpenPGP; see RFC 2440 + * packet tag 3 */ + max_packet_size = (1 /* Tag 3 identifier */ + + 3 /* Max Tag 3 packet size */ + + 1 /* Version */ + + 1 /* Cipher code */ + + 1 /* S2K specifier */ + + 1 /* Hash identifier */ + + ECRYPTFS_SALT_SIZE /* Salt */ + + 1 /* Hash iterations */ + + key_rec->enc_key_size); /* Encrypted key size */ + if (max_packet_size > (*remaining_bytes)) { + printk(KERN_ERR "Packet too large; need up to [%td] bytes, but " + "there are only [%td] available\n", max_packet_size, + (*remaining_bytes)); rc = -EINVAL; goto out; } - /* This format is inspired by OpenPGP; see RFC 2440 - * packet tag 3 */ dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; - /* ver+cipher+s2k+hash+salt+iter+enc_key */ - rc = write_packet_length(&dest[(*packet_size)], - (0x05 + ECRYPTFS_SALT_SIZE - + key_rec->enc_key_size), + /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3) + * to get the number of octets in the actual Tag 3 packet */ + rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), &packet_size_length); if (rc) { - ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet " - "header; cannot generate packet length\n"); + printk(KERN_ERR "Error generating tag 3 packet header; cannot " + "generate packet length. rc = [%d]\n", rc); goto out; } (*packet_size) += packet_size_length; dest[(*packet_size)++] = 0x04; /* version 4 */ + /* TODO: Break from RFC2440 so that arbitrary ciphers can be + * specified with strings */ cipher_code = ecryptfs_code_for_cipher_string(crypt_stat); if (cipher_code == 0) { ecryptfs_printk(KERN_WARNING, "Unable to generate code for " @@ -1631,10 +1708,10 @@ encrypted_session_key_set: key_rec->enc_key_size); (*packet_size) += key_rec->enc_key_size; out: - if (desc.tfm && !tfm_mutex) - crypto_free_blkcipher(desc.tfm); if (rc) (*packet_size) = 0; + else + (*remaining_bytes) -= (*packet_size); return rc; } @@ -1642,7 +1719,7 @@ struct kmem_cache *ecryptfs_key_record_cache; /** * ecryptfs_generate_key_packet_set - * @dest: Virtual address from which to write the key record set + * @dest_base: Virtual address from which to write the key record set * @crypt_stat: The cryptographic context from which the * authentication tokens will be retrieved * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat @@ -1662,24 +1739,43 @@ ecryptfs_generate_key_packet_set(char *dest_base, size_t max) { struct ecryptfs_auth_tok *auth_tok; + struct ecryptfs_global_auth_tok *global_auth_tok; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; size_t written; struct ecryptfs_key_record *key_rec; + struct ecryptfs_key_sig *key_sig; int rc = 0; (*len) = 0; + mutex_lock(&crypt_stat->keysig_list_mutex); key_rec = kmem_cache_alloc(ecryptfs_key_record_cache, GFP_KERNEL); if (!key_rec) { rc = -ENOMEM; goto out; } - if (mount_crypt_stat->global_auth_tok) { - auth_tok = mount_crypt_stat->global_auth_tok; + list_for_each_entry(key_sig, &crypt_stat->keysig_list, + crypt_stat_list) { + memset(key_rec, 0, sizeof(*key_rec)); + rc = ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok, + mount_crypt_stat, + key_sig->keysig); + if (rc) { + printk(KERN_ERR "Error attempting to get the global " + "auth_tok; rc = [%d]\n", rc); + goto out_free; + } + if (global_auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID) { + printk(KERN_WARNING + "Skipping invalid auth tok with sig = [%s]\n", + global_auth_tok->sig); + continue; + } + auth_tok = global_auth_tok->global_auth_tok; if (auth_tok->token_type == ECRYPTFS_PASSWORD) { rc = write_tag_3_packet((dest_base + (*len)), - max, auth_tok, + &max, auth_tok, crypt_stat, key_rec, &written); if (rc) { @@ -1689,10 +1785,9 @@ ecryptfs_generate_key_packet_set(char *dest_base, } (*len) += written; /* Write auth tok signature packet */ - rc = write_tag_11_packet( - (dest_base + (*len)), - (max - (*len)), - key_rec->sig, ECRYPTFS_SIG_SIZE, &written); + rc = write_tag_11_packet((dest_base + (*len)), &max, + key_rec->sig, + ECRYPTFS_SIG_SIZE, &written); if (rc) { ecryptfs_printk(KERN_ERR, "Error writing " "auth tok signature packet\n"); @@ -1701,9 +1796,8 @@ ecryptfs_generate_key_packet_set(char *dest_base, (*len) += written; } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { rc = write_tag_1_packet(dest_base + (*len), - max, auth_tok, - crypt_stat,mount_crypt_stat, - key_rec, &written); + &max, auth_tok, + crypt_stat, key_rec, &written); if (rc) { ecryptfs_printk(KERN_WARNING, "Error " "writing tag 1 packet\n"); @@ -1716,19 +1810,69 @@ ecryptfs_generate_key_packet_set(char *dest_base, rc = -EINVAL; goto out_free; } - } else - BUG(); - if (likely((max - (*len)) > 0)) { + } + if (likely(max > 0)) { dest_base[(*len)] = 0x00; } else { ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n"); rc = -EIO; } - out_free: kmem_cache_free(ecryptfs_key_record_cache, key_rec); out: if (rc) (*len) = 0; + mutex_unlock(&crypt_stat->keysig_list_mutex); return rc; } + +struct kmem_cache *ecryptfs_key_sig_cache; + +int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig) +{ + struct ecryptfs_key_sig *new_key_sig; + int rc = 0; + + new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL); + if (!new_key_sig) { + rc = -ENOMEM; + printk(KERN_ERR + "Error allocating from ecryptfs_key_sig_cache\n"); + goto out; + } + memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX); + mutex_lock(&crypt_stat->keysig_list_mutex); + list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list); + mutex_unlock(&crypt_stat->keysig_list_mutex); +out: + return rc; +} + +struct kmem_cache *ecryptfs_global_auth_tok_cache; + +int +ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat, + char *sig) +{ + struct ecryptfs_global_auth_tok *new_auth_tok; + int rc = 0; + + new_auth_tok = kmem_cache_alloc(ecryptfs_global_auth_tok_cache, + GFP_KERNEL); + if (!new_auth_tok) { + rc = -ENOMEM; + printk(KERN_ERR "Error allocating from " + "ecryptfs_global_auth_tok_cache\n"); + goto out; + } + memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX); + new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0'; + mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_add(&new_auth_tok->mount_crypt_stat_list, + &mount_crypt_stat->global_auth_tok_list); + mount_crypt_stat->num_global_auth_toks++; + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); +out: + return rc; +} + diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a98497264fe..97e6801f722 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -99,6 +99,64 @@ void __ecryptfs_printk(const char *fmt, ...) } /** + * ecryptfs_init_persistent_file + * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with + * the lower dentry and the lower mount set + * + * eCryptfs only ever keeps a single open file for every lower + * inode. All I/O operations to the lower inode occur through that + * file. When the first eCryptfs dentry that interposes with the first + * lower dentry for that inode is created, this function creates the + * persistent file struct and associates it with the eCryptfs + * inode. When the eCryptfs inode is destroyed, the file is closed. + * + * The persistent file will be opened with read/write permissions, if + * possible. Otherwise, it is opened read-only. + * + * This function does nothing if a lower persistent file is already + * associated with the eCryptfs inode. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) +{ + struct ecryptfs_inode_info *inode_info = + ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); + int rc = 0; + + mutex_lock(&inode_info->lower_file_mutex); + if (!inode_info->lower_file) { + struct dentry *lower_dentry; + struct vfsmount *lower_mnt = + ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); + + lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + /* Corresponding dput() and mntput() are done when the + * persistent file is fput() when the eCryptfs inode + * is destroyed. */ + dget(lower_dentry); + mntget(lower_mnt); + inode_info->lower_file = dentry_open(lower_dentry, + lower_mnt, + (O_RDWR | O_LARGEFILE)); + if (IS_ERR(inode_info->lower_file)) + inode_info->lower_file = dentry_open(lower_dentry, + lower_mnt, + (O_RDONLY + | O_LARGEFILE)); + if (IS_ERR(inode_info->lower_file)) { + printk(KERN_ERR "Error opening lower persistent file " + "for lower_dentry [0x%p] and lower_mnt [0x%p]\n", + lower_dentry, lower_mnt); + rc = PTR_ERR(inode_info->lower_file); + inode_info->lower_file = NULL; + } + } + mutex_unlock(&inode_info->lower_file_mutex); + return rc; +} + +/** * ecryptfs_interpose * @lower_dentry: Existing dentry in the lower filesystem * @dentry: ecryptfs' dentry @@ -155,6 +213,13 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, /* This size will be overwritten for real files w/ headers and * other metadata */ fsstack_copy_inode_size(inode, lower_inode); + rc = ecryptfs_init_persistent_file(dentry); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize the " + "persistent file for the dentry with name [%s]; " + "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc); + goto out; + } out: return rc; } @@ -179,38 +244,41 @@ static match_table_t tokens = { {ecryptfs_opt_err, NULL} }; -/** - * ecryptfs_verify_version - * @version: The version number to confirm - * - * Returns zero on good version; non-zero otherwise - */ -static int ecryptfs_verify_version(u16 version) +static int ecryptfs_init_global_auth_toks( + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) { + struct ecryptfs_global_auth_tok *global_auth_tok; int rc = 0; - unsigned char major; - unsigned char minor; - - major = ((version >> 8) & 0xFF); - minor = (version & 0xFF); - if (major != ECRYPTFS_VERSION_MAJOR) { - ecryptfs_printk(KERN_ERR, "Major version number mismatch. " - "Expected [%d]; got [%d]\n", - ECRYPTFS_VERSION_MAJOR, major); - rc = -EINVAL; - goto out; - } - if (minor != ECRYPTFS_VERSION_MINOR) { - ecryptfs_printk(KERN_ERR, "Minor version number mismatch. " - "Expected [%d]; got [%d]\n", - ECRYPTFS_VERSION_MINOR, minor); - rc = -EINVAL; - goto out; + + list_for_each_entry(global_auth_tok, + &mount_crypt_stat->global_auth_tok_list, + mount_crypt_stat_list) { + rc = ecryptfs_keyring_auth_tok_for_sig( + &global_auth_tok->global_auth_tok_key, + &global_auth_tok->global_auth_tok, + global_auth_tok->sig); + if (rc) { + printk(KERN_ERR "Could not find valid key in user " + "session keyring for sig specified in mount " + "option: [%s]\n", global_auth_tok->sig); + global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID; + rc = 0; + } else + global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; } -out: return rc; } +static void ecryptfs_init_mount_crypt_stat( + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + memset((void *)mount_crypt_stat, 0, + sizeof(struct ecryptfs_mount_crypt_stat)); + INIT_LIST_HEAD(&mount_crypt_stat->global_auth_tok_list); + mutex_init(&mount_crypt_stat->global_auth_tok_list_mutex); + mount_crypt_stat->flags |= ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED; +} + /** * ecryptfs_parse_options * @sb: The ecryptfs super block @@ -238,14 +306,11 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) int cipher_name_set = 0; int cipher_key_bytes; int cipher_key_bytes_set = 0; - struct key *auth_tok_key = NULL; - struct ecryptfs_auth_tok *auth_tok = NULL; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; substring_t args[MAX_OPT_ARGS]; int token; char *sig_src; - char *sig_dst; char *debug_src; char *cipher_name_dst; char *cipher_name_src; @@ -256,6 +321,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) rc = -EINVAL; goto out; } + ecryptfs_init_mount_crypt_stat(mount_crypt_stat); while ((p = strsep(&options, ",")) != NULL) { if (!*p) continue; @@ -264,14 +330,13 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) case ecryptfs_opt_sig: case ecryptfs_opt_ecryptfs_sig: sig_src = args[0].from; - sig_dst = - mount_crypt_stat->global_auth_tok_sig; - memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX); - sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0'; - ecryptfs_printk(KERN_DEBUG, - "The mount_crypt_stat " - "global_auth_tok_sig set to: " - "[%s]\n", sig_dst); + rc = ecryptfs_add_global_auth_tok(mount_crypt_stat, + sig_src); + if (rc) { + printk(KERN_ERR "Error attempting to register " + "global sig; rc = [%d]\n", rc); + goto out; + } sig_set = 1; break; case ecryptfs_opt_debug: @@ -333,12 +398,10 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) p); } } - /* Do not support lack of mount-wide signature in 0.1 - * release */ if (!sig_set) { rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "You must supply a valid " - "passphrase auth tok signature as a mount " + ecryptfs_printk(KERN_ERR, "You must supply at least one valid " + "auth tok signature as a mount " "parameter; see the eCryptfs README\n"); goto out; } @@ -358,55 +421,23 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) if (!cipher_key_bytes_set) { mount_crypt_stat->global_default_cipher_key_size = 0; } - rc = ecryptfs_process_cipher( - &mount_crypt_stat->global_key_tfm, - mount_crypt_stat->global_default_cipher_name, - &mount_crypt_stat->global_default_cipher_key_size); + rc = ecryptfs_add_new_key_tfm( + NULL, mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); if (rc) { - printk(KERN_ERR "Error attempting to initialize cipher [%s] " - "with key size [%Zd] bytes; rc = [%d]\n", + printk(KERN_ERR "Error attempting to initialize cipher with " + "name = [%s] and key size = [%td]; rc = [%d]\n", mount_crypt_stat->global_default_cipher_name, mount_crypt_stat->global_default_cipher_key_size, rc); - mount_crypt_stat->global_key_tfm = NULL; - mount_crypt_stat->global_auth_tok_key = NULL; rc = -EINVAL; goto out; } - mutex_init(&mount_crypt_stat->global_key_tfm_mutex); - ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: " - "[%s]\n", mount_crypt_stat->global_auth_tok_sig); - /* The reference to this key is held until umount is done The - * call to key_put is done in ecryptfs_put_super() */ - auth_tok_key = request_key(&key_type_user, - mount_crypt_stat->global_auth_tok_sig, - NULL); - if (!auth_tok_key || IS_ERR(auth_tok_key)) { - ecryptfs_printk(KERN_ERR, "Could not find key with " - "description: [%s]\n", - mount_crypt_stat->global_auth_tok_sig); - process_request_key_err(PTR_ERR(auth_tok_key)); - rc = -EINVAL; - goto out; - } - auth_tok = ecryptfs_get_key_payload_data(auth_tok_key); - if (ecryptfs_verify_version(auth_tok->version)) { - ecryptfs_printk(KERN_ERR, "Data structure version mismatch. " - "Userspace tools must match eCryptfs kernel " - "module with major version [%d] and minor " - "version [%d]\n", ECRYPTFS_VERSION_MAJOR, - ECRYPTFS_VERSION_MINOR); - rc = -EINVAL; - goto out; - } - if (auth_tok->token_type != ECRYPTFS_PASSWORD - && auth_tok->token_type != ECRYPTFS_PRIVATE_KEY) { - ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure " - "returned from key query\n"); - rc = -EINVAL; - goto out; + rc = ecryptfs_init_global_auth_toks(mount_crypt_stat); + if (rc) { + printk(KERN_WARNING "One or more global auth toks could not " + "properly register; rc = [%d]\n", rc); } - mount_crypt_stat->global_auth_tok_key = auth_tok_key; - mount_crypt_stat->global_auth_tok = auth_tok; + rc = 0; out: return rc; } @@ -495,7 +526,8 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) sb->s_maxbytes = lower_root->d_sb->s_maxbytes; ecryptfs_set_dentry_lower(sb->s_root, lower_root); ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); - if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0))) + rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0); + if (rc) goto out_free; rc = 0; goto out; @@ -639,15 +671,25 @@ static struct ecryptfs_cache_info { .size = PAGE_CACHE_SIZE, }, { - .cache = &ecryptfs_lower_page_cache, - .name = "ecryptfs_lower_page_cache", - .size = PAGE_CACHE_SIZE, - }, - { .cache = &ecryptfs_key_record_cache, .name = "ecryptfs_key_record_cache", .size = sizeof(struct ecryptfs_key_record), }, + { + .cache = &ecryptfs_key_sig_cache, + .name = "ecryptfs_key_sig_cache", + .size = sizeof(struct ecryptfs_key_sig), + }, + { + .cache = &ecryptfs_global_auth_tok_cache, + .name = "ecryptfs_global_auth_tok_cache", + .size = sizeof(struct ecryptfs_global_auth_tok), + }, + { + .cache = &ecryptfs_key_tfm_cache, + .name = "ecryptfs_key_tfm_cache", + .size = sizeof(struct ecryptfs_key_tfm), + }, }; static void ecryptfs_free_kmem_caches(void) @@ -750,7 +792,8 @@ static struct ecryptfs_version_str_map_elem { {ECRYPTFS_VERSIONING_PUBKEY, "pubkey"}, {ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"}, {ECRYPTFS_VERSIONING_POLICY, "policy"}, - {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"} + {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"}, + {ECRYPTFS_VERSIONING_MULTKEY, "multiple keys per file"} }; static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff) @@ -786,7 +829,8 @@ static int do_sysfs_registration(void) { int rc; - if ((rc = subsystem_register(&ecryptfs_subsys))) { + rc = subsystem_register(&ecryptfs_subsys); + if (rc) { printk(KERN_ERR "Unable to register ecryptfs sysfs subsystem\n"); goto out; @@ -845,33 +889,49 @@ static int __init ecryptfs_init(void) rc = register_filesystem(&ecryptfs_fs_type); if (rc) { printk(KERN_ERR "Failed to register filesystem\n"); - ecryptfs_free_kmem_caches(); - goto out; + goto out_free_kmem_caches; } kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); rc = do_sysfs_registration(); if (rc) { printk(KERN_ERR "sysfs registration failed\n"); - unregister_filesystem(&ecryptfs_fs_type); - ecryptfs_free_kmem_caches(); - goto out; + goto out_unregister_filesystem; } rc = ecryptfs_init_messaging(ecryptfs_transport); if (rc) { ecryptfs_printk(KERN_ERR, "Failure occured while attempting to " "initialize the eCryptfs netlink socket\n"); - do_sysfs_unregistration(); - unregister_filesystem(&ecryptfs_fs_type); - ecryptfs_free_kmem_caches(); + goto out_do_sysfs_unregistration; + } + rc = ecryptfs_init_crypto(); + if (rc) { + printk(KERN_ERR "Failure whilst attempting to init crypto; " + "rc = [%d]\n", rc); + goto out_release_messaging; } + goto out; +out_release_messaging: + ecryptfs_release_messaging(ecryptfs_transport); +out_do_sysfs_unregistration: + do_sysfs_unregistration(); +out_unregister_filesystem: + unregister_filesystem(&ecryptfs_fs_type); +out_free_kmem_caches: + ecryptfs_free_kmem_caches(); out: return rc; } static void __exit ecryptfs_exit(void) { - do_sysfs_unregistration(); + int rc; + + rc = ecryptfs_destroy_crypto(); + if (rc) + printk(KERN_ERR "Failure whilst attempting to destroy crypto; " + "rc = [%d]\n", rc); ecryptfs_release_messaging(ecryptfs_transport); + do_sysfs_unregistration(); unregister_filesystem(&ecryptfs_fs_type); ecryptfs_free_kmem_caches(); } diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index a9d87c47f72..a96d341d154 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -419,8 +419,9 @@ int ecryptfs_init_messaging(unsigned int transport) } mutex_init(&ecryptfs_daemon_id_hash_mux); mutex_lock(&ecryptfs_daemon_id_hash_mux); - ecryptfs_hash_buckets = 0; - while (ecryptfs_number_of_users >> ++ecryptfs_hash_buckets); + ecryptfs_hash_buckets = 1; + while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) + ecryptfs_hash_buckets++; ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) * ecryptfs_hash_buckets, GFP_KERNEL); if (!ecryptfs_daemon_id_hash) { diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index fd3f94d4a66..16a7a555f39 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -37,130 +37,27 @@ struct kmem_cache *ecryptfs_lower_page_cache; /** - * ecryptfs_get1page + * ecryptfs_get_locked_page * * Get one page from cache or lower f/s, return error otherwise. * - * Returns unlocked and up-to-date page (if ok), with increased + * Returns locked and up-to-date page (if ok), with increased * refcnt. */ -static struct page *ecryptfs_get1page(struct file *file, int index) +struct page *ecryptfs_get_locked_page(struct file *file, loff_t index) { struct dentry *dentry; struct inode *inode; struct address_space *mapping; + struct page *page; dentry = file->f_path.dentry; inode = dentry->d_inode; mapping = inode->i_mapping; - return read_mapping_page(mapping, index, (void *)file); -} - -/** - * ecryptfs_fill_zeros - * @file: The ecryptfs file - * @new_length: The new length of the data in the underlying file; - * everything between the prior end of the file and the - * new end of the file will be filled with zero's. - * new_length must be greater than current length - * - * Function for handling lseek-ing past the end of the file. - * - * This function does not support shrinking, only growing a file. - * - * Returns zero on success; non-zero otherwise. - */ -int ecryptfs_fill_zeros(struct file *file, loff_t new_length) -{ - int rc = 0; - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; - pgoff_t old_end_page_index = 0; - pgoff_t index = old_end_page_index; - int old_end_pos_in_page = -1; - pgoff_t new_end_page_index; - int new_end_pos_in_page; - loff_t cur_length = i_size_read(inode); - - if (cur_length != 0) { - index = old_end_page_index = - ((cur_length - 1) >> PAGE_CACHE_SHIFT); - old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK); - } - new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT); - new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK); - ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; " - "old_end_pos_in_page = [%d]; " - "new_end_page_index = [0x%.16x]; " - "new_end_pos_in_page = [%d]\n", - old_end_page_index, old_end_pos_in_page, - new_end_page_index, new_end_pos_in_page); - if (old_end_page_index == new_end_page_index) { - /* Start and end are in the same page; we just need to - * set a portion of the existing page to zero's */ - rc = ecryptfs_write_zeros(file, index, - (old_end_pos_in_page + 1), - (new_end_pos_in_page - - old_end_pos_in_page)); - if (rc) - ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(" - "file=[%p], " - "index=[0x%.16x], " - "old_end_pos_in_page=[d], " - "(PAGE_CACHE_SIZE - new_end_pos_in_page" - "=[%d]" - ")=[d]) returned [%d]\n", file, index, - old_end_pos_in_page, - new_end_pos_in_page, - (PAGE_CACHE_SIZE - new_end_pos_in_page), - rc); - goto out; - } - /* Fill the remainder of the previous last page with zeros */ - rc = ecryptfs_write_zeros(file, index, (old_end_pos_in_page + 1), - ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page)); - if (rc) { - ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=[%p], " - "index=[0x%.16x], old_end_pos_in_page=[d], " - "(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) " - "returned [%d]\n", file, index, - old_end_pos_in_page, - (PAGE_CACHE_SIZE - old_end_pos_in_page), rc); - goto out; - } - index++; - while (index < new_end_page_index) { - /* Fill all intermediate pages with zeros */ - rc = ecryptfs_write_zeros(file, index, 0, PAGE_CACHE_SIZE); - if (rc) { - ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(" - "file=[%p], " - "index=[0x%.16x], " - "old_end_pos_in_page=[d], " - "(PAGE_CACHE_SIZE - new_end_pos_in_page" - "=[%d]" - ")=[d]) returned [%d]\n", file, index, - old_end_pos_in_page, - new_end_pos_in_page, - (PAGE_CACHE_SIZE - new_end_pos_in_page), - rc); - goto out; - } - index++; - } - /* Fill the portion at the beginning of the last new page with - * zero's */ - rc = ecryptfs_write_zeros(file, index, 0, (new_end_pos_in_page + 1)); - if (rc) { - ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=" - "[%p], index=[0x%.16x], 0, " - "new_end_pos_in_page=[%d]" - "returned [%d]\n", file, index, - new_end_pos_in_page, rc); - goto out; - } -out: - return rc; + page = read_mapping_page(mapping, index, (void *)file); + if (!IS_ERR(page)) + lock_page(page); + return page; } /** @@ -171,13 +68,9 @@ out: */ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) { - struct ecryptfs_page_crypt_context ctx; int rc; - ctx.page = page; - ctx.mode = ECRYPTFS_WRITEPAGE_MODE; - ctx.param.wbc = wbc; - rc = ecryptfs_encrypt_page(&ctx); + rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting " "page (upper index [0x%.16x])\n", page->index); @@ -191,58 +84,6 @@ out: } /** - * Reads the data from the lower file file at index lower_page_index - * and copies that data into page. - * - * @param page Page to fill - * @param lower_page_index Index of the page in the lower file to get - */ -int ecryptfs_do_readpage(struct file *file, struct page *page, - pgoff_t lower_page_index) -{ - int rc; - struct dentry *dentry; - struct file *lower_file; - struct dentry *lower_dentry; - struct inode *inode; - struct inode *lower_inode; - char *page_data; - struct page *lower_page = NULL; - char *lower_page_data; - const struct address_space_operations *lower_a_ops; - - dentry = file->f_path.dentry; - lower_file = ecryptfs_file_to_lower(file); - lower_dentry = ecryptfs_dentry_to_lower(dentry); - inode = dentry->d_inode; - lower_inode = ecryptfs_inode_to_lower(inode); - lower_a_ops = lower_inode->i_mapping->a_ops; - lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index, - (filler_t *)lower_a_ops->readpage, - (void *)lower_file); - if (IS_ERR(lower_page)) { - rc = PTR_ERR(lower_page); - lower_page = NULL; - ecryptfs_printk(KERN_ERR, "Error reading from page cache\n"); - goto out; - } - page_data = kmap_atomic(page, KM_USER0); - lower_page_data = kmap_atomic(lower_page, KM_USER1); - memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE); - kunmap_atomic(lower_page_data, KM_USER1); - kunmap_atomic(page_data, KM_USER0); - flush_dcache_page(page); - rc = 0; -out: - if (likely(lower_page)) - page_cache_release(lower_page); - if (rc == 0) - SetPageUptodate(page); - else - ClearPageUptodate(page); - return rc; -} -/** * Header Extent: * Octets 0-7: Unencrypted file size (big-endian) * Octets 8-15: eCryptfs special marker @@ -271,9 +112,77 @@ static void set_header_info(char *page_virt, } /** + * ecryptfs_copy_up_encrypted_with_header + * @page: Sort of a ``virtual'' representation of the encrypted lower + * file. The actual lower file does not have the metadata in + * the header. This is locked. + * @crypt_stat: The eCryptfs inode's cryptographic context + * + * The ``view'' is the version of the file that userspace winds up + * seeing, with the header information inserted. + */ +static int +ecryptfs_copy_up_encrypted_with_header(struct page *page, + struct ecryptfs_crypt_stat *crypt_stat) +{ + loff_t extent_num_in_page = 0; + loff_t num_extents_per_page = (PAGE_CACHE_SIZE + / crypt_stat->extent_size); + int rc = 0; + + while (extent_num_in_page < num_extents_per_page) { + loff_t view_extent_num = ((((loff_t)page->index) + * num_extents_per_page) + + extent_num_in_page); + + if (view_extent_num < crypt_stat->num_header_extents_at_front) { + /* This is a header extent */ + char *page_virt; + + page_virt = kmap_atomic(page, KM_USER0); + memset(page_virt, 0, PAGE_CACHE_SIZE); + /* TODO: Support more than one header extent */ + if (view_extent_num == 0) { + rc = ecryptfs_read_xattr_region( + page_virt, page->mapping->host); + set_header_info(page_virt, crypt_stat); + } + kunmap_atomic(page_virt, KM_USER0); + flush_dcache_page(page); + if (rc) { + printk(KERN_ERR "%s: Error reading xattr " + "region; rc = [%d]\n", __FUNCTION__, rc); + goto out; + } + } else { + /* This is an encrypted data extent */ + loff_t lower_offset = + ((view_extent_num - + crypt_stat->num_header_extents_at_front) + * crypt_stat->extent_size); + + rc = ecryptfs_read_lower_page_segment( + page, (lower_offset >> PAGE_CACHE_SHIFT), + (lower_offset & ~PAGE_CACHE_MASK), + crypt_stat->extent_size, page->mapping->host); + if (rc) { + printk(KERN_ERR "%s: Error attempting to read " + "extent at offset [%lld] in the lower " + "file; rc = [%d]\n", __FUNCTION__, + lower_offset, rc); + goto out; + } + } + extent_num_in_page++; + } +out: + return rc; +} + +/** * ecryptfs_readpage - * @file: This is an ecryptfs file - * @page: ecryptfs associated page to stick the read data into + * @file: An eCryptfs file + * @page: Page from eCryptfs inode mapping into which to stick the read data * * Read in a page, decrypting if necessary. * @@ -281,59 +190,35 @@ static void set_header_info(char *page_virt, */ static int ecryptfs_readpage(struct file *file, struct page *page) { + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; int rc = 0; - struct ecryptfs_crypt_stat *crypt_stat; - BUG_ON(!(file && file->f_path.dentry && file->f_path.dentry->d_inode)); - crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode) - ->crypt_stat; if (!crypt_stat || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED) || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { ecryptfs_printk(KERN_DEBUG, "Passing through unencrypted page\n"); - rc = ecryptfs_do_readpage(file, page, page->index); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error reading page; rc = " - "[%d]\n", rc); - goto out; - } + rc = ecryptfs_read_lower_page_segment(page, page->index, 0, + PAGE_CACHE_SIZE, + page->mapping->host); } else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) { if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { - int num_pages_in_header_region = - (crypt_stat->header_extent_size - / PAGE_CACHE_SIZE); - - if (page->index < num_pages_in_header_region) { - char *page_virt; - - page_virt = kmap_atomic(page, KM_USER0); - memset(page_virt, 0, PAGE_CACHE_SIZE); - if (page->index == 0) { - rc = ecryptfs_read_xattr_region( - page_virt, file->f_path.dentry); - set_header_info(page_virt, crypt_stat); - } - kunmap_atomic(page_virt, KM_USER0); - flush_dcache_page(page); - if (rc) { - printk(KERN_ERR "Error reading xattr " - "region\n"); - goto out; - } - } else { - rc = ecryptfs_do_readpage( - file, page, - (page->index - - num_pages_in_header_region)); - if (rc) { - printk(KERN_ERR "Error reading page; " - "rc = [%d]\n", rc); - goto out; - } + rc = ecryptfs_copy_up_encrypted_with_header(page, + crypt_stat); + if (rc) { + printk(KERN_ERR "%s: Error attempting to copy " + "the encrypted content from the lower " + "file whilst inserting the metadata " + "from the xattr into the header; rc = " + "[%d]\n", __FUNCTION__, rc); + goto out; } + } else { - rc = ecryptfs_do_readpage(file, page, page->index); + rc = ecryptfs_read_lower_page_segment( + page, page->index, 0, PAGE_CACHE_SIZE, + page->mapping->host); if (rc) { printk(KERN_ERR "Error reading page; rc = " "[%d]\n", rc); @@ -341,17 +226,18 @@ static int ecryptfs_readpage(struct file *file, struct page *page) } } } else { - rc = ecryptfs_decrypt_page(file, page); + rc = ecryptfs_decrypt_page(page); if (rc) { ecryptfs_printk(KERN_ERR, "Error decrypting page; " "rc = [%d]\n", rc); goto out; } } - SetPageUptodate(page); out: if (rc) ClearPageUptodate(page); + else + SetPageUptodate(page); ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", page->index); unlock_page(page); @@ -377,27 +263,6 @@ out: return 0; } -/** - * eCryptfs does not currently support holes. When writing after a - * seek past the end of the file, eCryptfs fills in 0's through to the - * current location. The code to fill in the 0's to all the - * intermediate pages calls ecryptfs_prepare_write_no_truncate(). - */ -static int -ecryptfs_prepare_write_no_truncate(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - int rc = 0; - - if (from == 0 && to == PAGE_CACHE_SIZE) - goto out; /* If we are writing a full page, it will be - up to date. */ - if (!PageUptodate(page)) - rc = ecryptfs_do_readpage(file, page, page->index); -out: - return rc; -} - static int ecryptfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -406,10 +271,21 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (from == 0 && to == PAGE_CACHE_SIZE) goto out; /* If we are writing a full page, it will be up to date. */ - if (!PageUptodate(page)) - rc = ecryptfs_do_readpage(file, page, page->index); + if (!PageUptodate(page)) { + rc = ecryptfs_read_lower_page_segment(page, page->index, 0, + PAGE_CACHE_SIZE, + page->mapping->host); + if (rc) { + printk(KERN_ERR "%s: Error attemping to read lower " + "page segment; rc = [%d]\n", __FUNCTION__, rc); + ClearPageUptodate(page); + goto out; + } else + SetPageUptodate(page); + } if (page->index != 0) { - loff_t end_of_prev_pg_pos = page_offset(page) - 1; + loff_t end_of_prev_pg_pos = + (((loff_t)page->index << PAGE_CACHE_SHIFT) - 1); if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) { rc = ecryptfs_truncate(file->f_path.dentry, @@ -428,32 +304,6 @@ out: return rc; } -int ecryptfs_writepage_and_release_lower_page(struct page *lower_page, - struct inode *lower_inode, - struct writeback_control *wbc) -{ - int rc = 0; - - rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); " - "rc = [%d]\n", rc); - goto out; - } - lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; - page_cache_release(lower_page); -out: - return rc; -} - -static -void ecryptfs_release_lower_page(struct page *lower_page, int page_locked) -{ - if (page_locked) - unlock_page(lower_page); - page_cache_release(lower_page); -} - /** * ecryptfs_write_inode_size_to_header * @@ -461,67 +311,48 @@ void ecryptfs_release_lower_page(struct page *lower_page, int page_locked) * * Returns zero on success; non-zero on error. */ -static int ecryptfs_write_inode_size_to_header(struct file *lower_file, - struct inode *lower_inode, - struct inode *inode) +static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) { - int rc = 0; - struct page *header_page; - char *header_virt; - const struct address_space_operations *lower_a_ops; u64 file_size; + char *file_size_virt; + int rc; -retry: - header_page = grab_cache_page(lower_inode->i_mapping, 0); - if (!header_page) { - ecryptfs_printk(KERN_ERR, "grab_cache_page for " - "lower_page_index 0 failed\n"); - rc = -EINVAL; - goto out; - } - lower_a_ops = lower_inode->i_mapping->a_ops; - rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8); - if (rc) { - if (rc == AOP_TRUNCATED_PAGE) { - ecryptfs_release_lower_page(header_page, 0); - goto retry; - } else - ecryptfs_release_lower_page(header_page, 1); + file_size_virt = kmalloc(sizeof(u64), GFP_KERNEL); + if (!file_size_virt) { + rc = -ENOMEM; goto out; } - file_size = (u64)i_size_read(inode); - ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size); + file_size = (u64)i_size_read(ecryptfs_inode); file_size = cpu_to_be64(file_size); - header_virt = kmap_atomic(header_page, KM_USER0); - memcpy(header_virt, &file_size, sizeof(u64)); - kunmap_atomic(header_virt, KM_USER0); - flush_dcache_page(header_page); - rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8); - if (rc < 0) - ecryptfs_printk(KERN_ERR, "Error commiting header page " - "write\n"); - if (rc == AOP_TRUNCATED_PAGE) { - ecryptfs_release_lower_page(header_page, 0); - goto retry; - } else - ecryptfs_release_lower_page(header_page, 1); - lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; - mark_inode_dirty_sync(inode); + memcpy(file_size_virt, &file_size, sizeof(u64)); + rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, + sizeof(u64)); + kfree(file_size_virt); + if (rc) + printk(KERN_ERR "%s: Error writing file size to header; " + "rc = [%d]\n", __FUNCTION__, rc); out: return rc; } -static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode, - struct inode *inode, - struct dentry *ecryptfs_dentry, - int lower_i_mutex_held) +struct kmem_cache *ecryptfs_xattr_cache; + +static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) { ssize_t size; void *xattr_virt; - struct dentry *lower_dentry; + struct dentry *lower_dentry = + ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; + struct inode *lower_inode = lower_dentry->d_inode; u64 file_size; int rc; + if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) { + printk(KERN_WARNING + "No support for setting xattr in lower filesystem\n"); + rc = -ENOSYS; + goto out; + } xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL); if (!xattr_virt) { printk(KERN_ERR "Out of memory whilst attempting to write " @@ -529,35 +360,17 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode, rc = -ENOMEM; goto out; } - lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); - if (!lower_dentry->d_inode->i_op->getxattr || - !lower_dentry->d_inode->i_op->setxattr) { - printk(KERN_WARNING - "No support for setting xattr in lower filesystem\n"); - rc = -ENOSYS; - kmem_cache_free(ecryptfs_xattr_cache, xattr_virt); - goto out; - } - if (!lower_i_mutex_held) - mutex_lock(&lower_dentry->d_inode->i_mutex); - size = lower_dentry->d_inode->i_op->getxattr(lower_dentry, - ECRYPTFS_XATTR_NAME, - xattr_virt, - PAGE_CACHE_SIZE); - if (!lower_i_mutex_held) - mutex_unlock(&lower_dentry->d_inode->i_mutex); + mutex_lock(&lower_inode->i_mutex); + size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME, + xattr_virt, PAGE_CACHE_SIZE); if (size < 0) size = 8; - file_size = (u64)i_size_read(inode); + file_size = (u64)i_size_read(ecryptfs_inode); file_size = cpu_to_be64(file_size); memcpy(xattr_virt, &file_size, sizeof(u64)); - if (!lower_i_mutex_held) - mutex_lock(&lower_dentry->d_inode->i_mutex); - rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, - ECRYPTFS_XATTR_NAME, - xattr_virt, size, 0); - if (!lower_i_mutex_held) - mutex_unlock(&lower_dentry->d_inode->i_mutex); + rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME, + xattr_virt, size, 0); + mutex_unlock(&lower_inode->i_mutex); if (rc) printk(KERN_ERR "Error whilst attempting to write inode size " "to lower file xattr; rc = [%d]\n", rc); @@ -566,122 +379,18 @@ out: return rc; } -int -ecryptfs_write_inode_size_to_metadata(struct file *lower_file, - struct inode *lower_inode, - struct inode *inode, - struct dentry *ecryptfs_dentry, - int lower_i_mutex_held) +int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode) { struct ecryptfs_crypt_stat *crypt_stat; - crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - return ecryptfs_write_inode_size_to_xattr(lower_inode, inode, - ecryptfs_dentry, - lower_i_mutex_held); + return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode); else - return ecryptfs_write_inode_size_to_header(lower_file, - lower_inode, - inode); -} - -int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, - struct file *lower_file, - unsigned long lower_page_index, int byte_offset, - int region_bytes) -{ - int rc = 0; - -retry: - *lower_page = grab_cache_page(lower_inode->i_mapping, lower_page_index); - if (!(*lower_page)) { - rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Error attempting to grab " - "lower page with index [0x%.16x]\n", - lower_page_index); - goto out; - } - rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file, - (*lower_page), - byte_offset, - region_bytes); - if (rc) { - if (rc == AOP_TRUNCATED_PAGE) { - ecryptfs_release_lower_page(*lower_page, 0); - goto retry; - } else { - ecryptfs_printk(KERN_ERR, "prepare_write for " - "lower_page_index = [0x%.16x] failed; rc = " - "[%d]\n", lower_page_index, rc); - ecryptfs_release_lower_page(*lower_page, 1); - (*lower_page) = NULL; - } - } -out: - return rc; -} - -/** - * ecryptfs_commit_lower_page - * - * Returns zero on success; non-zero on error - */ -int -ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, - struct file *lower_file, int byte_offset, - int region_size) -{ - int page_locked = 1; - int rc = 0; - - rc = lower_inode->i_mapping->a_ops->commit_write( - lower_file, lower_page, byte_offset, region_size); - if (rc == AOP_TRUNCATED_PAGE) - page_locked = 0; - if (rc < 0) { - ecryptfs_printk(KERN_ERR, - "Error committing write; rc = [%d]\n", rc); - } else - rc = 0; - ecryptfs_release_lower_page(lower_page, page_locked); - return rc; + return ecryptfs_write_inode_size_to_header(ecryptfs_inode); } /** - * ecryptfs_copy_page_to_lower - * - * Used for plaintext pass-through; no page index interpolation - * required. - */ -int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, - struct file *lower_file) -{ - int rc = 0; - struct page *lower_page; - - rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file, - page->index, 0, PAGE_CACHE_SIZE); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to get page " - "at index [0x%.16x]\n", page->index); - goto out; - } - /* TODO: aops */ - memcpy((char *)page_address(lower_page), page_address(page), - PAGE_CACHE_SIZE); - rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file, - 0, PAGE_CACHE_SIZE); - if (rc) - ecryptfs_printk(KERN_ERR, "Error attempting to commit page " - "at index [0x%.16x]\n", page->index); -out: - return rc; -} - -struct kmem_cache *ecryptfs_xattr_cache; - -/** * ecryptfs_commit_write * @file: The eCryptfs file object * @page: The eCryptfs page @@ -695,20 +404,12 @@ struct kmem_cache *ecryptfs_xattr_cache; static int ecryptfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - struct ecryptfs_page_crypt_context ctx; loff_t pos; - struct inode *inode; - struct inode *lower_inode; - struct file *lower_file; - struct ecryptfs_crypt_stat *crypt_stat; + struct inode *ecryptfs_inode = page->mapping->host; + struct ecryptfs_crypt_stat *crypt_stat = + &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; int rc; - inode = page->mapping->host; - lower_inode = ecryptfs_inode_to_lower(inode); - lower_file = ecryptfs_file_to_lower(file); - mutex_lock(&lower_inode->i_mutex); - crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode) - ->crypt_stat; if (crypt_stat->flags & ECRYPTFS_NEW_FILE) { ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in " "crypt_stat at memory location [%p]\n", crypt_stat); @@ -718,6 +419,7 @@ static int ecryptfs_commit_write(struct file *file, struct page *page, ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" "(page w/ index = [0x%.16x], to = [%d])\n", page->index, to); + /* Fills in zeros if 'to' goes beyond inode size */ rc = fill_zeros_to_end_of_page(page, to); if (rc) { ecryptfs_printk(KERN_WARNING, "Error attempting to fill " @@ -725,82 +427,22 @@ static int ecryptfs_commit_write(struct file *file, struct page *page, page->index); goto out; } - ctx.page = page; - ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE; - ctx.param.lower_file = lower_file; - rc = ecryptfs_encrypt_page(&ctx); + rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " "index [0x%.16x])\n", page->index); goto out; } - inode->i_blocks = lower_inode->i_blocks; - pos = page_offset(page) + to; - if (pos > i_size_read(inode)) { - i_size_write(inode, pos); + pos = (((loff_t)page->index) << PAGE_CACHE_SHIFT) + to; + if (pos > i_size_read(ecryptfs_inode)) { + i_size_write(ecryptfs_inode, pos); ecryptfs_printk(KERN_DEBUG, "Expanded file size to " - "[0x%.16x]\n", i_size_read(inode)); + "[0x%.16x]\n", i_size_read(ecryptfs_inode)); } - rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode, - inode, file->f_dentry, - ECRYPTFS_LOWER_I_MUTEX_HELD); + rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); if (rc) printk(KERN_ERR "Error writing inode size to metadata; " "rc = [%d]\n", rc); - lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; - mark_inode_dirty_sync(inode); -out: - if (rc < 0) - ClearPageUptodate(page); - else - SetPageUptodate(page); - mutex_unlock(&lower_inode->i_mutex); - return rc; -} - -/** - * ecryptfs_write_zeros - * @file: The ecryptfs file - * @index: The index in which we are writing - * @start: The position after the last block of data - * @num_zeros: The number of zeros to write - * - * Write a specified number of zero's to a page. - * - * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE - */ -int -ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros) -{ - int rc = 0; - struct page *tmp_page; - - tmp_page = ecryptfs_get1page(file, index); - if (IS_ERR(tmp_page)) { - ecryptfs_printk(KERN_ERR, "Error getting page at index " - "[0x%.16x]\n", index); - rc = PTR_ERR(tmp_page); - goto out; - } - if ((rc = ecryptfs_prepare_write_no_truncate(file, tmp_page, start, - (start + num_zeros)))) { - ecryptfs_printk(KERN_ERR, "Error preparing to write zero's " - "to page at index [0x%.16x]\n", - index); - page_cache_release(tmp_page); - goto out; - } - zero_user_page(tmp_page, start, num_zeros, KM_USER0); - rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); - if (rc < 0) { - ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " - "to remainder of page at index [0x%.16x]\n", - index); - page_cache_release(tmp_page); - goto out; - } - rc = 0; - page_cache_release(tmp_page); out: return rc; } @@ -819,34 +461,10 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) return rc; } -static void ecryptfs_sync_page(struct page *page) -{ - struct inode *inode; - struct inode *lower_inode; - struct page *lower_page; - - inode = page->mapping->host; - lower_inode = ecryptfs_inode_to_lower(inode); - /* NOTE: Recently swapped with grab_cache_page(), since - * sync_page() just makes sure that pending I/O gets done. */ - lower_page = find_lock_page(lower_inode->i_mapping, page->index); - if (!lower_page) { - ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n"); - return; - } - if (lower_page->mapping->a_ops->sync_page) - lower_page->mapping->a_ops->sync_page(lower_page); - ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", - lower_page->index); - unlock_page(lower_page); - page_cache_release(lower_page); -} - struct address_space_operations ecryptfs_aops = { .writepage = ecryptfs_writepage, .readpage = ecryptfs_readpage, .prepare_write = ecryptfs_prepare_write, .commit_write = ecryptfs_commit_write, .bmap = ecryptfs_bmap, - .sync_page = ecryptfs_sync_page, }; diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c new file mode 100644 index 00000000000..2150edf9a58 --- /dev/null +++ b/fs/ecryptfs/read_write.c @@ -0,0 +1,358 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 2007 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/fs.h> +#include <linux/pagemap.h> +#include "ecryptfs_kernel.h" + +/** + * ecryptfs_write_lower + * @ecryptfs_inode: The eCryptfs inode + * @data: Data to write + * @offset: Byte offset in the lower file to which to write the data + * @size: Number of bytes from @data to write at @offset in the lower + * file + * + * Write data to the lower file. + * + * Returns zero on success; non-zero on error + */ +int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, + loff_t offset, size_t size) +{ + struct ecryptfs_inode_info *inode_info; + ssize_t octets_written; + mm_segment_t fs_save; + int rc = 0; + + inode_info = ecryptfs_inode_to_private(ecryptfs_inode); + mutex_lock(&inode_info->lower_file_mutex); + BUG_ON(!inode_info->lower_file); + inode_info->lower_file->f_pos = offset; + fs_save = get_fs(); + set_fs(get_ds()); + octets_written = vfs_write(inode_info->lower_file, data, size, + &inode_info->lower_file->f_pos); + set_fs(fs_save); + if (octets_written < 0) { + printk(KERN_ERR "%s: octets_written = [%td]; " + "expected [%td]\n", __FUNCTION__, octets_written, size); + rc = -EINVAL; + } + mutex_unlock(&inode_info->lower_file_mutex); + mark_inode_dirty_sync(ecryptfs_inode); + return rc; +} + +/** + * ecryptfs_write_lower_page_segment + * @ecryptfs_inode: The eCryptfs inode + * @page_for_lower: The page containing the data to be written to the + * lower file + * @offset_in_page: The offset in the @page_for_lower from which to + * start writing the data + * @size: The amount of data from @page_for_lower to write to the + * lower file + * + * Determines the byte offset in the file for the given page and + * offset within the page, maps the page, and makes the call to write + * the contents of @page_for_lower to the lower inode. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, + struct page *page_for_lower, + size_t offset_in_page, size_t size) +{ + char *virt; + loff_t offset; + int rc; + + offset = ((((off_t)page_for_lower->index) << PAGE_CACHE_SHIFT) + + offset_in_page); + virt = kmap(page_for_lower); + rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size); + kunmap(page_for_lower); + return rc; +} + +/** + * ecryptfs_write + * @ecryptfs_file: The eCryptfs file into which to write + * @data: Virtual address where data to write is located + * @offset: Offset in the eCryptfs file at which to begin writing the + * data from @data + * @size: The number of bytes to write from @data + * + * Write an arbitrary amount of data to an arbitrary location in the + * eCryptfs inode page cache. This is done on a page-by-page, and then + * by an extent-by-extent, basis; individual extents are encrypted and + * written to the lower page cache (via VFS writes). This function + * takes care of all the address translation to locations in the lower + * filesystem; it also handles truncate events, writing out zeros + * where necessary. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, + size_t size) +{ + struct page *ecryptfs_page; + char *ecryptfs_page_virt; + loff_t ecryptfs_file_size = + i_size_read(ecryptfs_file->f_dentry->d_inode); + loff_t data_offset = 0; + loff_t pos; + int rc = 0; + + if (offset > ecryptfs_file_size) + pos = ecryptfs_file_size; + else + pos = offset; + while (pos < (offset + size)) { + pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT); + size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK); + size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page); + size_t total_remaining_bytes = ((offset + size) - pos); + + if (num_bytes > total_remaining_bytes) + num_bytes = total_remaining_bytes; + if (pos < offset) { + size_t total_remaining_zeros = (offset - pos); + + if (num_bytes > total_remaining_zeros) + num_bytes = total_remaining_zeros; + } + ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file, + ecryptfs_page_idx); + if (IS_ERR(ecryptfs_page)) { + rc = PTR_ERR(ecryptfs_page); + printk(KERN_ERR "%s: Error getting page at " + "index [%ld] from eCryptfs inode " + "mapping; rc = [%d]\n", __FUNCTION__, + ecryptfs_page_idx, rc); + goto out; + } + if (start_offset_in_page) { + /* Read in the page from the lower + * into the eCryptfs inode page cache, + * decrypting */ + rc = ecryptfs_decrypt_page(ecryptfs_page); + if (rc) { + printk(KERN_ERR "%s: Error decrypting " + "page; rc = [%d]\n", + __FUNCTION__, rc); + ClearPageUptodate(ecryptfs_page); + page_cache_release(ecryptfs_page); + goto out; + } + } + ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); + if (pos >= offset) { + memcpy(((char *)ecryptfs_page_virt + + start_offset_in_page), + (data + data_offset), num_bytes); + data_offset += num_bytes; + } else { + /* We are extending past the previous end of the file. + * Fill in zero values up to the start of where we + * will be writing data. */ + memset(((char *)ecryptfs_page_virt + + start_offset_in_page), 0, num_bytes); + } + kunmap_atomic(ecryptfs_page_virt, KM_USER0); + flush_dcache_page(ecryptfs_page); + SetPageUptodate(ecryptfs_page); + unlock_page(ecryptfs_page); + rc = ecryptfs_encrypt_page(ecryptfs_page); + page_cache_release(ecryptfs_page); + if (rc) { + printk(KERN_ERR "%s: Error encrypting " + "page; rc = [%d]\n", __FUNCTION__, rc); + goto out; + } + pos += num_bytes; + } + if ((offset + size) > ecryptfs_file_size) { + i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size)); + rc = ecryptfs_write_inode_size_to_metadata( + ecryptfs_file->f_dentry->d_inode); + if (rc) { + printk(KERN_ERR "Problem with " + "ecryptfs_write_inode_size_to_metadata; " + "rc = [%d]\n", rc); + goto out; + } + } +out: + return rc; +} + +/** + * ecryptfs_read_lower + * @data: The read data is stored here by this function + * @offset: Byte offset in the lower file from which to read the data + * @size: Number of bytes to read from @offset of the lower file and + * store into @data + * @ecryptfs_inode: The eCryptfs inode + * + * Read @size bytes of data at byte offset @offset from the lower + * inode into memory location @data. + * + * Returns zero on success; non-zero on error + */ +int ecryptfs_read_lower(char *data, loff_t offset, size_t size, + struct inode *ecryptfs_inode) +{ + struct ecryptfs_inode_info *inode_info = + ecryptfs_inode_to_private(ecryptfs_inode); + ssize_t octets_read; + mm_segment_t fs_save; + int rc = 0; + + mutex_lock(&inode_info->lower_file_mutex); + BUG_ON(!inode_info->lower_file); + inode_info->lower_file->f_pos = offset; + fs_save = get_fs(); + set_fs(get_ds()); + octets_read = vfs_read(inode_info->lower_file, data, size, + &inode_info->lower_file->f_pos); + set_fs(fs_save); + if (octets_read < 0) { + printk(KERN_ERR "%s: octets_read = [%td]; " + "expected [%td]\n", __FUNCTION__, octets_read, size); + rc = -EINVAL; + } + mutex_unlock(&inode_info->lower_file_mutex); + return rc; +} + +/** + * ecryptfs_read_lower_page_segment + * @page_for_ecryptfs: The page into which data for eCryptfs will be + * written + * @offset_in_page: Offset in @page_for_ecryptfs from which to start + * writing + * @size: The number of bytes to write into @page_for_ecryptfs + * @ecryptfs_inode: The eCryptfs inode + * + * Determines the byte offset in the file for the given page and + * offset within the page, maps the page, and makes the call to read + * the contents of @page_for_ecryptfs from the lower inode. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, + pgoff_t page_index, + size_t offset_in_page, size_t size, + struct inode *ecryptfs_inode) +{ + char *virt; + loff_t offset; + int rc; + + offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page); + virt = kmap(page_for_ecryptfs); + rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode); + kunmap(page_for_ecryptfs); + flush_dcache_page(page_for_ecryptfs); + return rc; +} + +/** + * ecryptfs_read + * @data: The virtual address into which to write the data read (and + * possibly decrypted) from the lower file + * @offset: The offset in the decrypted view of the file from which to + * read into @data + * @size: The number of bytes to read into @data + * @ecryptfs_file: The eCryptfs file from which to read + * + * Read an arbitrary amount of data from an arbitrary location in the + * eCryptfs page cache. This is done on an extent-by-extent basis; + * individual extents are decrypted and read from the lower page + * cache (via VFS reads). This function takes care of all the + * address translation to locations in the lower filesystem. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_read(char *data, loff_t offset, size_t size, + struct file *ecryptfs_file) +{ + struct page *ecryptfs_page; + char *ecryptfs_page_virt; + loff_t ecryptfs_file_size = + i_size_read(ecryptfs_file->f_dentry->d_inode); + loff_t data_offset = 0; + loff_t pos; + int rc = 0; + + if ((offset + size) > ecryptfs_file_size) { + rc = -EINVAL; + printk(KERN_ERR "%s: Attempt to read data past the end of the " + "file; offset = [%lld]; size = [%td]; " + "ecryptfs_file_size = [%lld]\n", + __FUNCTION__, offset, size, ecryptfs_file_size); + goto out; + } + pos = offset; + while (pos < (offset + size)) { + pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT); + size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK); + size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page); + size_t total_remaining_bytes = ((offset + size) - pos); + + if (num_bytes > total_remaining_bytes) + num_bytes = total_remaining_bytes; + ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file, + ecryptfs_page_idx); + if (IS_ERR(ecryptfs_page)) { + rc = PTR_ERR(ecryptfs_page); + printk(KERN_ERR "%s: Error getting page at " + "index [%ld] from eCryptfs inode " + "mapping; rc = [%d]\n", __FUNCTION__, + ecryptfs_page_idx, rc); + goto out; + } + rc = ecryptfs_decrypt_page(ecryptfs_page); + if (rc) { + printk(KERN_ERR "%s: Error decrypting " + "page; rc = [%d]\n", __FUNCTION__, rc); + ClearPageUptodate(ecryptfs_page); + page_cache_release(ecryptfs_page); + goto out; + } + ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); + memcpy((data + data_offset), + ((char *)ecryptfs_page_virt + start_offset_in_page), + num_bytes); + kunmap_atomic(ecryptfs_page_virt, KM_USER0); + flush_dcache_page(ecryptfs_page); + SetPageUptodate(ecryptfs_page); + unlock_page(ecryptfs_page); + page_cache_release(ecryptfs_page); + pos += num_bytes; + data_offset += num_bytes; + } +out: + return rc; +} diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 7b3f0cc09a6..f8cdab2bee3 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -27,6 +27,7 @@ #include <linux/mount.h> #include <linux/key.h> #include <linux/seq_file.h> +#include <linux/file.h> #include <linux/crypto.h> #include "ecryptfs_kernel.h" @@ -46,15 +47,16 @@ struct kmem_cache *ecryptfs_inode_info_cache; */ static struct inode *ecryptfs_alloc_inode(struct super_block *sb) { - struct ecryptfs_inode_info *ecryptfs_inode; + struct ecryptfs_inode_info *inode_info; struct inode *inode = NULL; - ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache, - GFP_KERNEL); - if (unlikely(!ecryptfs_inode)) + inode_info = kmem_cache_alloc(ecryptfs_inode_info_cache, GFP_KERNEL); + if (unlikely(!inode_info)) goto out; - ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat); - inode = &ecryptfs_inode->vfs_inode; + ecryptfs_init_crypt_stat(&inode_info->crypt_stat); + mutex_init(&inode_info->lower_file_mutex); + inode_info->lower_file = NULL; + inode = &inode_info->vfs_inode; out: return inode; } @@ -63,9 +65,10 @@ out: * ecryptfs_destroy_inode * @inode: The ecryptfs inode * - * This is used during the final destruction of the inode. - * All allocation of memory related to the inode, including allocated - * memory in the crypt_stat struct, will be released here. + * This is used during the final destruction of the inode. All + * allocation of memory related to the inode, including allocated + * memory in the crypt_stat struct, will be released here. This + * function also fput()'s the persistent file for the lower inode. * There should be no chance that this deallocation will be missed. */ static void ecryptfs_destroy_inode(struct inode *inode) @@ -73,7 +76,21 @@ static void ecryptfs_destroy_inode(struct inode *inode) struct ecryptfs_inode_info *inode_info; inode_info = ecryptfs_inode_to_private(inode); - ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat); + mutex_lock(&inode_info->lower_file_mutex); + if (inode_info->lower_file) { + struct dentry *lower_dentry = + inode_info->lower_file->f_dentry; + + BUG_ON(!lower_dentry); + if (lower_dentry->d_inode) { + fput(inode_info->lower_file); + inode_info->lower_file = NULL; + d_drop(lower_dentry); + d_delete(lower_dentry); + } + } + mutex_unlock(&inode_info->lower_file_mutex); + ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); kmem_cache_free(ecryptfs_inode_info_cache, inode_info); } @@ -104,7 +121,7 @@ static void ecryptfs_put_super(struct super_block *sb) { struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); - ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat); + ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); kmem_cache_free(ecryptfs_sb_info_cache, sb_info); ecryptfs_set_superblock_private(sb, NULL); } diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 2bf49d7ef84..05d9342bb64 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -22,7 +22,9 @@ */ #include "ext2.h" +#include <linux/buffer_head.h> #include <linux/pagemap.h> +#include <linux/swap.h> typedef struct ext2_dir_entry_2 ext2_dirent; @@ -61,16 +63,25 @@ ext2_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) +static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; + dir->i_version++; - page->mapping->a_ops->commit_write(NULL, page, from, to); + block_write_end(NULL, mapping, pos, len, len, page, NULL); + + if (pos+len > dir->i_size) { + i_size_write(dir, pos+len); + mark_inode_dirty(dir); + } + if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else unlock_page(page); + return err; } @@ -412,16 +423,18 @@ ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, struct page *page, struct inode *inode) { - unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + le16_to_cpu(de->rec_len); + loff_t pos = page_offset(page) + + (char *) de - (char *) page_address(page); + unsigned len = le16_to_cpu(de->rec_len); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = __ext2_write_begin(NULL, page->mapping, pos, len, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - err = ext2_commit_chunk(page, from, to); + ext2_set_de_type(de, inode); + err = ext2_commit_chunk(page, pos, len); ext2_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; @@ -444,7 +457,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) unsigned long npages = dir_pages(dir); unsigned long n; char *kaddr; - unsigned from, to; + loff_t pos; int err; /* @@ -497,9 +510,10 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) return -EINVAL; got_it: - from = (char*)de - (char*)page_address(page); - to = from + rec_len; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + pos = page_offset(page) + + (char*)de - (char*)page_address(page); + err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0, + &page, NULL); if (err) goto out_unlock; if (de->inode) { @@ -509,10 +523,10 @@ got_it: de = de1; } de->name_len = namelen; - memcpy (de->name, name, namelen); + memcpy(de->name, name, namelen); de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type (de, inode); - err = ext2_commit_chunk(page, from, to); + err = ext2_commit_chunk(page, pos, rec_len); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); @@ -537,6 +551,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); + loff_t pos; ext2_dirent * pde = NULL; ext2_dirent * de = (ext2_dirent *) (kaddr + from); int err; @@ -553,13 +568,15 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) } if (pde) from = (char*)pde - (char*)page_address(page); + pos = page_offset(page) + from; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0, + &page, NULL); BUG_ON(err); if (pde) - pde->rec_len = cpu_to_le16(to-from); + pde->rec_len = cpu_to_le16(to - from); dir->inode = 0; - err = ext2_commit_chunk(page, from, to); + err = ext2_commit_chunk(page, pos, to - from); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(inode); @@ -582,7 +599,9 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) if (!page) return -ENOMEM; - err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); + + err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0, + &page, NULL); if (err) { unlock_page(page); goto fail; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 9fd0ec5ba0d..a08052d2c00 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -134,6 +134,9 @@ extern void ext2_truncate (struct inode *); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); extern void ext2_get_inode_flags(struct ext2_inode_info *); +int __ext2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); /* ioctl.c */ extern int ext2_ioctl (struct inode *, struct file *, unsigned int, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 0079b2cd531..1b102a1cceb 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -642,18 +642,35 @@ ext2_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); } +int __ext2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ext2_get_block); +} + static int -ext2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +ext2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page,from,to,ext2_get_block); + *pagep = NULL; + return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); } static int -ext2_nobh_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +ext2_nobh_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return nobh_prepare_write(page,from,to,ext2_get_block); + /* + * Dir-in-pagecache still uses ext2_write_begin. Would have to rework + * directory handling code to pass around offsets rather than struct + * pages in order to make this work easily. + */ + return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ext2_get_block); } static int ext2_nobh_writepage(struct page *page, @@ -689,8 +706,8 @@ const struct address_space_operations ext2_aops = { .readpages = ext2_readpages, .writepage = ext2_writepage, .sync_page = block_sync_page, - .prepare_write = ext2_prepare_write, - .commit_write = generic_commit_write, + .write_begin = ext2_write_begin, + .write_end = generic_write_end, .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, @@ -707,8 +724,8 @@ const struct address_space_operations ext2_nobh_aops = { .readpages = ext2_readpages, .writepage = ext2_nobh_writepage, .sync_page = block_sync_page, - .prepare_write = ext2_nobh_prepare_write, - .commit_write = nobh_commit_write, + .write_begin = ext2_nobh_write_begin, + .write_end = nobh_write_end, .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, @@ -925,7 +942,8 @@ void ext2_truncate (struct inode * inode) if (mapping_is_xip(inode->i_mapping)) xip_truncate_page(inode->i_mapping, inode->i_size); else if (test_opt(inode->i_sb, NOBH)) - nobh_truncate_page(inode->i_mapping, inode->i_size); + nobh_truncate_page(inode->i_mapping, + inode->i_size, ext2_get_block); else block_truncate_page(inode->i_mapping, inode->i_size, ext2_get_block); diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index c00723a99f4..c2c3491b18c 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -143,7 +143,7 @@ static int ext3_readdir(struct file * filp, sb->s_bdev->bd_inode->i_mapping, &filp->f_ra, filp, index, 1); - filp->f_ra.prev_index = index; + filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; bh = ext3_bread(NULL, inode, blk, 0, &err); } diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index de4e3161e47..2f2b6864db1 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1147,51 +1147,68 @@ static int do_journal_get_write_access(handle_t *handle, return ext3_journal_get_write_access(handle, bh); } -static int ext3_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext3_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; int ret, needed_blocks = ext3_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; + struct page *page; + pgoff_t index; + unsigned from, to; + + index = pos >> PAGE_CACHE_SHIFT; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; retry: + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + handle = ext3_journal_start(inode, needed_blocks); if (IS_ERR(handle)) { + unlock_page(page); + page_cache_release(page); ret = PTR_ERR(handle); goto out; } - if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) - ret = nobh_prepare_write(page, from, to, ext3_get_block); - else - ret = block_prepare_write(page, from, to, ext3_get_block); + ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ext3_get_block); if (ret) - goto prepare_write_failed; + goto write_begin_failed; if (ext3_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); } -prepare_write_failed: - if (ret) +write_begin_failed: + if (ret) { ext3_journal_stop(handle); + unlock_page(page); + page_cache_release(page); + } if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; out: return ret; } + int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) { int err = journal_dirty_data(handle, bh); if (err) ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__, - bh, handle,err); + bh, handle, err); return err; } -/* For commit_write() in data=journal mode */ -static int commit_write_fn(handle_t *handle, struct buffer_head *bh) +/* For write_end() in data=journal mode */ +static int write_end_fn(handle_t *handle, struct buffer_head *bh) { if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; @@ -1200,84 +1217,130 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh) } /* + * Generic write_end handler for ordered and writeback ext3 journal modes. + * We can't use generic_write_end, because that unlocks the page and we need to + * unlock the page after ext3_journal_stop, but ext3_journal_stop must run + * after block_write_end. + */ +static int ext3_generic_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = file->f_mapping->host; + + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + if (pos+copied > inode->i_size) { + i_size_write(inode, pos+copied); + mark_inode_dirty(inode); + } + + return copied; +} + +/* * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from page_symlink(). * * ext3 never places buffers on inode->i_mapping->private_list. metadata * buffers are managed internally. */ -static int ext3_ordered_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext3_ordered_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = file->f_mapping->host; + unsigned from, to; int ret = 0, ret2; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, ext3_journal_dirty_data); if (ret == 0) { /* - * generic_commit_write() will run mark_inode_dirty() if i_size + * generic_write_end() will run mark_inode_dirty() if i_size * changes. So let's piggyback the i_disksize mark_inode_dirty * into that. */ loff_t new_i_size; - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + new_i_size = pos + copied; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - ret = generic_commit_write(file, page, from, to); + copied = ext3_generic_write_end(file, mapping, pos, len, copied, + page, fsdata); + if (copied < 0) + ret = copied; } ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; - return ret; + unlock_page(page); + page_cache_release(page); + + return ret ? ret : copied; } -static int ext3_writeback_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext3_writeback_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = file->f_mapping->host; int ret = 0, ret2; loff_t new_i_size; - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + new_i_size = pos + copied; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) - ret = nobh_commit_write(file, page, from, to); - else - ret = generic_commit_write(file, page, from, to); + copied = ext3_generic_write_end(file, mapping, pos, len, copied, + page, fsdata); + if (copied < 0) + ret = copied; ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; - return ret; + unlock_page(page); + page_cache_release(page); + + return ret ? ret : copied; } -static int ext3_journalled_commit_write(struct file *file, - struct page *page, unsigned from, unsigned to) +static int ext3_journalled_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; int ret = 0, ret2; int partial = 0; - loff_t pos; + unsigned from, to; - /* - * Here we duplicate the generic_commit_write() functionality - */ - pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + + if (copied < len) { + if (!PageUptodate(page)) + copied = 0; + page_zero_new_buffers(page, from+copied, to); + } ret = walk_page_buffers(handle, page_buffers(page), from, - to, &partial, commit_write_fn); + to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - if (pos > inode->i_size) - i_size_write(inode, pos); + if (pos+copied > inode->i_size) + i_size_write(inode, pos+copied); EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; @@ -1285,10 +1348,14 @@ static int ext3_journalled_commit_write(struct file *file, if (!ret) ret = ret2; } + ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; - return ret; + unlock_page(page); + page_cache_release(page); + + return ret ? ret : copied; } /* @@ -1546,7 +1613,7 @@ static int ext3_journalled_writepage(struct page *page, PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); err = walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, commit_write_fn); + PAGE_CACHE_SIZE, NULL, write_end_fn); if (ret == 0) ret = err; EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; @@ -1706,8 +1773,8 @@ static const struct address_space_operations ext3_ordered_aops = { .readpages = ext3_readpages, .writepage = ext3_ordered_writepage, .sync_page = block_sync_page, - .prepare_write = ext3_prepare_write, - .commit_write = ext3_ordered_commit_write, + .write_begin = ext3_write_begin, + .write_end = ext3_ordered_write_end, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, @@ -1720,8 +1787,8 @@ static const struct address_space_operations ext3_writeback_aops = { .readpages = ext3_readpages, .writepage = ext3_writeback_writepage, .sync_page = block_sync_page, - .prepare_write = ext3_prepare_write, - .commit_write = ext3_writeback_commit_write, + .write_begin = ext3_write_begin, + .write_end = ext3_writeback_write_end, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, @@ -1734,8 +1801,8 @@ static const struct address_space_operations ext3_journalled_aops = { .readpages = ext3_readpages, .writepage = ext3_journalled_writepage, .sync_page = block_sync_page, - .prepare_write = ext3_prepare_write, - .commit_write = ext3_journalled_commit_write, + .write_begin = ext3_write_begin, + .write_end = ext3_journalled_write_end, .set_page_dirty = ext3_journalled_set_page_dirty, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 3ab01c04e00..e11890acfa2 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -142,7 +142,7 @@ static int ext4_readdir(struct file * filp, sb->s_bdev->bd_inode->i_mapping, &filp->f_ra, filp, index, 1); - filp->f_ra.prev_index = index; + filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; bh = ext4_bread(NULL, inode, blk, 0, &err); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a4848e04a5e..0df2b1e06d0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1146,34 +1146,50 @@ static int do_journal_get_write_access(handle_t *handle, return ext4_journal_get_write_access(handle, bh); } -static int ext4_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext4_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; int ret, needed_blocks = ext4_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; + struct page *page; + pgoff_t index; + unsigned from, to; + + index = pos >> PAGE_CACHE_SHIFT; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; retry: - handle = ext4_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + + handle = ext4_journal_start(inode, needed_blocks); + if (IS_ERR(handle)) { + unlock_page(page); + page_cache_release(page); + ret = PTR_ERR(handle); + goto out; } - if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_prepare_write(page, from, to, ext4_get_block); - else - ret = block_prepare_write(page, from, to, ext4_get_block); - if (ret) - goto prepare_write_failed; - if (ext4_should_journal_data(inode)) { + ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ext4_get_block); + + if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); } -prepare_write_failed: - if (ret) + + if (ret) { ext4_journal_stop(handle); + unlock_page(page); + page_cache_release(page); + } + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; out: @@ -1185,12 +1201,12 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) int err = jbd2_journal_dirty_data(handle, bh); if (err) ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, - bh, handle,err); + bh, handle, err); return err; } -/* For commit_write() in data=journal mode */ -static int commit_write_fn(handle_t *handle, struct buffer_head *bh) +/* For write_end() in data=journal mode */ +static int write_end_fn(handle_t *handle, struct buffer_head *bh) { if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; @@ -1199,84 +1215,130 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh) } /* + * Generic write_end handler for ordered and writeback ext4 journal modes. + * We can't use generic_write_end, because that unlocks the page and we need to + * unlock the page after ext4_journal_stop, but ext4_journal_stop must run + * after block_write_end. + */ +static int ext4_generic_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = file->f_mapping->host; + + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + if (pos+copied > inode->i_size) { + i_size_write(inode, pos+copied); + mark_inode_dirty(inode); + } + + return copied; +} + +/* * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from page_symlink(). * * ext4 never places buffers on inode->i_mapping->private_list. metadata * buffers are managed internally. */ -static int ext4_ordered_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext4_ordered_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = file->f_mapping->host; + unsigned from, to; int ret = 0, ret2; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, ext4_journal_dirty_data); if (ret == 0) { /* - * generic_commit_write() will run mark_inode_dirty() if i_size + * generic_write_end() will run mark_inode_dirty() if i_size * changes. So let's piggyback the i_disksize mark_inode_dirty * into that. */ loff_t new_i_size; - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + new_i_size = pos + copied; if (new_i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = new_i_size; - ret = generic_commit_write(file, page, from, to); + copied = ext4_generic_write_end(file, mapping, pos, len, copied, + page, fsdata); + if (copied < 0) + ret = copied; } ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - return ret; + unlock_page(page); + page_cache_release(page); + + return ret ? ret : copied; } -static int ext4_writeback_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext4_writeback_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = file->f_mapping->host; int ret = 0, ret2; loff_t new_i_size; - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + new_i_size = pos + copied; if (new_i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = new_i_size; - if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_commit_write(file, page, from, to); - else - ret = generic_commit_write(file, page, from, to); + copied = ext4_generic_write_end(file, mapping, pos, len, copied, + page, fsdata); + if (copied < 0) + ret = copied; ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - return ret; + unlock_page(page); + page_cache_release(page); + + return ret ? ret : copied; } -static int ext4_journalled_commit_write(struct file *file, - struct page *page, unsigned from, unsigned to) +static int ext4_journalled_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; int ret = 0, ret2; int partial = 0; - loff_t pos; + unsigned from, to; - /* - * Here we duplicate the generic_commit_write() functionality - */ - pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + + if (copied < len) { + if (!PageUptodate(page)) + copied = 0; + page_zero_new_buffers(page, from+copied, to); + } ret = walk_page_buffers(handle, page_buffers(page), from, - to, &partial, commit_write_fn); + to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - if (pos > inode->i_size) - i_size_write(inode, pos); + if (pos+copied > inode->i_size) + i_size_write(inode, pos+copied); EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; if (inode->i_size > EXT4_I(inode)->i_disksize) { EXT4_I(inode)->i_disksize = inode->i_size; @@ -1284,10 +1346,14 @@ static int ext4_journalled_commit_write(struct file *file, if (!ret) ret = ret2; } + ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - return ret; + unlock_page(page); + page_cache_release(page); + + return ret ? ret : copied; } /* @@ -1545,7 +1611,7 @@ static int ext4_journalled_writepage(struct page *page, PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); err = walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, commit_write_fn); + PAGE_CACHE_SIZE, NULL, write_end_fn); if (ret == 0) ret = err; EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; @@ -1705,8 +1771,8 @@ static const struct address_space_operations ext4_ordered_aops = { .readpages = ext4_readpages, .writepage = ext4_ordered_writepage, .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_ordered_commit_write, + .write_begin = ext4_write_begin, + .write_end = ext4_ordered_write_end, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, @@ -1719,8 +1785,8 @@ static const struct address_space_operations ext4_writeback_aops = { .readpages = ext4_readpages, .writepage = ext4_writeback_writepage, .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_writeback_commit_write, + .write_begin = ext4_write_begin, + .write_end = ext4_writeback_write_end, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, @@ -1733,8 +1799,8 @@ static const struct address_space_operations ext4_journalled_aops = { .readpages = ext4_readpages, .writepage = ext4_journalled_writepage, .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_journalled_commit_write, + .write_begin = ext4_write_begin, + .write_end = ext4_journalled_write_end, .set_page_dirty = ext4_journalled_set_page_dirty, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 4baa5f20536..46b8a67f55c 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -141,19 +141,24 @@ static int fat_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, fat_get_block); } -static int fat_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int fat_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return cont_prepare_write(page, from, to, fat_get_block, - &MSDOS_I(page->mapping->host)->mmu_private); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + fat_get_block, + &MSDOS_I(mapping->host)->mmu_private); } -static int fat_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int fat_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *pagep, void *fsdata) { - struct inode *inode = page->mapping->host; - int err = generic_commit_write(file, page, from, to); - if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { + struct inode *inode = mapping->host; + int err; + err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); + if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; MSDOS_I(inode)->i_attrs |= ATTR_ARCH; mark_inode_dirty(inode); @@ -202,8 +207,8 @@ static const struct address_space_operations fat_aops = { .writepage = fat_writepage, .writepages = fat_writepages, .sync_page = block_sync_page, - .prepare_write = fat_prepare_write, - .commit_write = fat_commit_write, + .write_begin = fat_write_begin, + .write_end = fat_write_end, .direct_IO = fat_direct_IO, .bmap = _fat_bmap }; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f79de7c8cdf..11f22a3d728 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -444,22 +444,25 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, return outarg.size; } -static int fuse_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +static int fuse_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - /* No op */ + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + + *pagep = __grab_cache_page(mapping, index); + if (!*pagep) + return -ENOMEM; return 0; } -static int fuse_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +static int fuse_buffered_write(struct file *file, struct inode *inode, + loff_t pos, unsigned count, struct page *page) { int err; size_t nres; - unsigned count = to - offset; - struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - loff_t pos = page_offset(page) + offset; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); struct fuse_req *req; if (is_bad_inode(inode)) @@ -475,20 +478,35 @@ static int fuse_commit_write(struct file *file, struct page *page, nres = fuse_send_write(req, file, inode, pos, count); err = req->out.h.error; fuse_put_request(fc, req); - if (!err && nres != count) + if (!err && !nres) err = -EIO; if (!err) { - pos += count; + pos += nres; spin_lock(&fc->lock); if (pos > inode->i_size) i_size_write(inode, pos); spin_unlock(&fc->lock); - if (offset == 0 && to == PAGE_CACHE_SIZE) + if (count == PAGE_CACHE_SIZE) SetPageUptodate(page); } fuse_invalidate_attr(inode); - return err; + return err ? err : nres; +} + +static int fuse_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + int res = 0; + + if (copied) + res = fuse_buffered_write(file, inode, pos, copied, page); + + unlock_page(page); + page_cache_release(page); + return res; } static void fuse_release_user_pages(struct fuse_req *req, int write) @@ -819,8 +837,8 @@ static const struct file_operations fuse_direct_io_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, - .prepare_write = fuse_prepare_write, - .commit_write = fuse_commit_write, + .write_begin = fuse_write_begin, + .write_end = fuse_write_end, .readpages = fuse_readpages, .set_page_dirty = fuse_set_page_dirty, .bmap = fuse_bmap, diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 873a511ef2b..9679f8b9870 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -17,6 +17,7 @@ #include <linux/mpage.h> #include <linux/fs.h> #include <linux/writeback.h> +#include <linux/swap.h> #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> @@ -349,45 +350,49 @@ out_unlock: } /** - * gfs2_prepare_write - Prepare to write a page to a file + * gfs2_write_begin - Begin to write to a file * @file: The file to write to - * @page: The page which is to be prepared for writing - * @from: From (byte range within page) - * @to: To (byte range within page) + * @mapping: The mapping in which to write + * @pos: The file offset at which to start writing + * @len: Length of the write + * @flags: Various flags + * @pagep: Pointer to return the page + * @fsdata: Pointer to return fs data (unused by GFS2) * * Returns: errno */ -static int gfs2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int gfs2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); unsigned int data_blocks, ind_blocks, rblocks; int alloc_required; int error = 0; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from; - loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; struct gfs2_alloc *al; - unsigned int write_len = to - from; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned to = from + len; + struct page *page; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); error = gfs2_glock_nq_atime(&ip->i_gh); - if (unlikely(error)) { - if (error == GLR_TRYFAILED) { - unlock_page(page); - error = AOP_TRUNCATED_PAGE; - yield(); - } + if (unlikely(error)) goto out_uninit; - } - gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks); + error = -ENOMEM; + page = __grab_cache_page(mapping, index); + *pagep = page; + if (!page) + goto out_unlock; + + gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); - error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required); + error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); if (error) - goto out_unlock; + goto out_putpage; ip->i_alloc.al_requested = 0; @@ -420,7 +425,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page, goto out_trans_fail; if (gfs2_is_stuffed(ip)) { - if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { + if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { error = gfs2_unstuff_dinode(ip, page); if (error == 0) goto prepare_write; @@ -443,6 +448,10 @@ out_qunlock: out_alloc_put: gfs2_alloc_put(ip); } +out_putpage: + page_cache_release(page); + if (pos + len > ip->i_inode.i_size) + vmtruncate(&ip->i_inode, ip->i_inode.i_size); out_unlock: gfs2_glock_dq_m(1, &ip->i_gh); out_uninit: @@ -478,65 +487,117 @@ static void adjust_fs_space(struct inode *inode) } /** - * gfs2_commit_write - Commit write to a file + * gfs2_stuffed_write_end - Write end for stuffed files + * @inode: The inode + * @dibh: The buffer_head containing the on-disk inode + * @pos: The file position + * @len: The length of the write + * @copied: How much was actually copied by the VFS + * @page: The page + * + * This copies the data from the page into the inode block after + * the inode data structure itself. + * + * Returns: errno + */ +static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned len, unsigned copied, + struct page *page) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + u64 to = pos + copied; + void *kaddr; + unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); + struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; + + BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); + kaddr = kmap_atomic(page, KM_USER0); + memcpy(buf + pos, kaddr + pos, copied); + memset(kaddr + pos + copied, 0, len - copied); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + + if (!PageUptodate(page)) + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); + + if (inode->i_size < to) { + i_size_write(inode, to); + ip->i_di.di_size = inode->i_size; + di->di_size = cpu_to_be64(inode->i_size); + mark_inode_dirty(inode); + } + + if (inode == sdp->sd_rindex) + adjust_fs_space(inode); + + brelse(dibh); + gfs2_trans_end(sdp); + gfs2_glock_dq(&ip->i_gh); + gfs2_holder_uninit(&ip->i_gh); + return copied; +} + +/** + * gfs2_write_end * @file: The file to write to - * @page: The page containing the data - * @from: From (byte range within page) - * @to: To (byte range within page) + * @mapping: The address space to write to + * @pos: The file position + * @len: The length of the data + * @copied: + * @page: The page that has been written + * @fsdata: The fsdata (unused in GFS2) + * + * The main write_end function for GFS2. We have a separate one for + * stuffed files as they are slightly different, otherwise we just + * put our locking around the VFS provided functions. * * Returns: errno */ -static int gfs2_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int gfs2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { struct inode *inode = page->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - int error = -EOPNOTSUPP; struct buffer_head *dibh; struct gfs2_alloc *al = &ip->i_alloc; struct gfs2_dinode *di; + unsigned int from = pos & (PAGE_CACHE_SIZE - 1); + unsigned int to = from + len; + int ret; - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) - goto fail_nounlock; + BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0); - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_endtrans; + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (unlikely(ret)) { + unlock_page(page); + page_cache_release(page); + goto failed; + } gfs2_trans_add_bh(ip->i_gl, dibh, 1); - di = (struct gfs2_dinode *)dibh->b_data; - - if (gfs2_is_stuffed(ip)) { - u64 file_size; - void *kaddr; - file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to; + if (gfs2_is_stuffed(ip)) + return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); - kaddr = kmap_atomic(page, KM_USER0); - memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, - kaddr + from, to - from); - kunmap_atomic(kaddr, KM_USER0); + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) + gfs2_page_add_databufs(ip, page, from, to); - SetPageUptodate(page); + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (inode->i_size < file_size) { - i_size_write(inode, file_size); + if (likely(ret >= 0)) { + copied = ret; + if ((pos + copied) > inode->i_size) { + di = (struct gfs2_dinode *)dibh->b_data; + ip->i_di.di_size = inode->i_size; + di->di_size = cpu_to_be64(inode->i_size); mark_inode_dirty(inode); } - } else { - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || - gfs2_is_jdata(ip)) - gfs2_page_add_databufs(ip, page, from, to); - error = generic_commit_write(file, page, from, to); - if (error) - goto fail; - } - - if (ip->i_di.di_size < inode->i_size) { - ip->i_di.di_size = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); } if (inode == sdp->sd_rindex) @@ -544,33 +605,15 @@ static int gfs2_commit_write(struct file *file, struct page *page, brelse(dibh); gfs2_trans_end(sdp); +failed: if (al->al_requested) { gfs2_inplace_release(ip); gfs2_quota_unlock(ip); gfs2_alloc_put(ip); } - unlock_page(page); - gfs2_glock_dq_m(1, &ip->i_gh); - lock_page(page); + gfs2_glock_dq(&ip->i_gh); gfs2_holder_uninit(&ip->i_gh); - return 0; - -fail: - brelse(dibh); -fail_endtrans: - gfs2_trans_end(sdp); - if (al->al_requested) { - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - } - unlock_page(page); - gfs2_glock_dq_m(1, &ip->i_gh); - lock_page(page); - gfs2_holder_uninit(&ip->i_gh); -fail_nounlock: - ClearPageUptodate(page); - return error; + return ret; } /** @@ -799,8 +842,8 @@ const struct address_space_operations gfs2_file_aops = { .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, - .prepare_write = gfs2_prepare_write, - .commit_write = gfs2_commit_write, + .write_begin = gfs2_write_begin, + .write_end = gfs2_write_end, .set_page_dirty = gfs2_set_page_dirty, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 5ea6b3d45ea..c176f67ba0a 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -464,23 +464,20 @@ void hfs_file_truncate(struct inode *inode) (long long)HFS_I(inode)->phys_size, inode->i_size); if (inode->i_size > HFS_I(inode)->phys_size) { struct address_space *mapping = inode->i_mapping; + void *fsdata; struct page *page; int res; + /* XXX: Can use generic_cont_expand? */ size = inode->i_size - 1; - page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); - if (!page) - return; - size &= PAGE_CACHE_SIZE - 1; - size++; - res = mapping->a_ops->prepare_write(NULL, page, size, size); - if (!res) - res = mapping->a_ops->commit_write(NULL, page, size, size); + res = pagecache_write_begin(NULL, mapping, size+1, 0, + AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + if (!res) { + res = pagecache_write_end(NULL, mapping, size+1, 0, 0, + page, fsdata); + } if (res) inode->i_size = HFS_I(inode)->phys_size; - unlock_page(page); - page_cache_release(page); - mark_inode_dirty(inode); return; } else if (inode->i_size == HFS_I(inode)->phys_size) return; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index bc835f272a6..97f8446c4ff 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -35,10 +35,14 @@ static int hfs_readpage(struct file *file, struct page *page) return block_read_full_page(page, hfs_get_block); } -static int hfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int hfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return cont_prepare_write(page, from, to, hfs_get_block, - &HFS_I(page->mapping->host)->phys_size); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + hfs_get_block, + &HFS_I(mapping->host)->phys_size); } static sector_t hfs_bmap(struct address_space *mapping, sector_t block) @@ -119,8 +123,8 @@ const struct address_space_operations hfs_btree_aops = { .readpage = hfs_readpage, .writepage = hfs_writepage, .sync_page = block_sync_page, - .prepare_write = hfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hfs_write_begin, + .write_end = generic_write_end, .bmap = hfs_bmap, .releasepage = hfs_releasepage, }; @@ -129,8 +133,8 @@ const struct address_space_operations hfs_aops = { .readpage = hfs_readpage, .writepage = hfs_writepage, .sync_page = block_sync_page, - .prepare_write = hfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hfs_write_begin, + .write_end = generic_write_end, .bmap = hfs_bmap, .direct_IO = hfs_direct_IO, .writepages = hfs_writepages, diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 1a7480089e8..12e899cd788 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -443,21 +443,18 @@ void hfsplus_file_truncate(struct inode *inode) if (inode->i_size > HFSPLUS_I(inode).phys_size) { struct address_space *mapping = inode->i_mapping; struct page *page; - u32 size = inode->i_size - 1; + void *fsdata; + u32 size = inode->i_size; int res; - page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); - if (!page) - return; - size &= PAGE_CACHE_SIZE - 1; - size++; - res = mapping->a_ops->prepare_write(NULL, page, size, size); - if (!res) - res = mapping->a_ops->commit_write(NULL, page, size, size); + res = pagecache_write_begin(NULL, mapping, size, 0, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); if (res) - inode->i_size = HFSPLUS_I(inode).phys_size; - unlock_page(page); - page_cache_release(page); + return; + res = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata); + if (res < 0) + return; mark_inode_dirty(inode); return; } else if (inode->i_size == HFSPLUS_I(inode).phys_size) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 6f7c662174d..37744cf3706 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -27,10 +27,14 @@ static int hfsplus_writepage(struct page *page, struct writeback_control *wbc) return block_write_full_page(page, hfsplus_get_block, wbc); } -static int hfsplus_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int hfsplus_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return cont_prepare_write(page, from, to, hfsplus_get_block, - &HFSPLUS_I(page->mapping->host).phys_size); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + hfsplus_get_block, + &HFSPLUS_I(mapping->host).phys_size); } static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) @@ -114,8 +118,8 @@ const struct address_space_operations hfsplus_btree_aops = { .readpage = hfsplus_readpage, .writepage = hfsplus_writepage, .sync_page = block_sync_page, - .prepare_write = hfsplus_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hfsplus_write_begin, + .write_end = generic_write_end, .bmap = hfsplus_bmap, .releasepage = hfsplus_releasepage, }; @@ -124,8 +128,8 @@ const struct address_space_operations hfsplus_aops = { .readpage = hfsplus_readpage, .writepage = hfsplus_writepage, .sync_page = block_sync_page, - .prepare_write = hfsplus_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hfsplus_write_begin, + .write_end = generic_write_end, .bmap = hfsplus_bmap, .direct_IO = hfsplus_direct_IO, .writepages = hfsplus_writepages, diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 06e5930515f..6ae9011b95e 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -3,7 +3,8 @@ #include "os.h" -/* These are exactly the same definitions as in fs.h, but the names are +/* + * These are exactly the same definitions as in fs.h, but the names are * changed so that this file can be included in both kernel and user files. */ @@ -21,7 +22,8 @@ #define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ #define HOSTFS_ATTR_ATTR_FLAG 1024 -/* If you are very careful, you'll notice that these two are missing: +/* + * If you are very careful, you'll notice that these two are missing: * * #define ATTR_KILL_SUID 2048 * #define ATTR_KILL_SGID 4096 @@ -76,7 +78,8 @@ extern int make_symlink(const char *from, const char *to); extern int unlink_file(const char *file); extern int do_mkdir(const char *file, int mode); extern int do_rmdir(const char *file); -extern int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor); +extern int do_mknod(const char *file, int mode, unsigned int major, + unsigned int minor); extern int link_file(const char *from, const char *to); extern int do_readlink(char *file, char *buf, int size); extern int rename_file(char *from, char *to); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index c77862032e8..8966b050196 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -6,21 +6,14 @@ * 2003-02-10 Petr Baudis <pasky@ucw.cz> */ -#include <linux/stddef.h> #include <linux/fs.h> #include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> +#include <linux/mm.h> #include <linux/pagemap.h> -#include <linux/blkdev.h> -#include <linux/list.h> #include <linux/statfs.h> -#include <linux/kdev_t.h> -#include <asm/uaccess.h> #include "hostfs.h" -#include "kern_util.h" -#include "kern.h" #include "init.h" +#include "kern.h" struct hostfs_inode_info { char *host_filename; @@ -61,18 +54,18 @@ static int __init hostfs_args(char *options, int *add) char *ptr; ptr = strchr(options, ','); - if(ptr != NULL) + if (ptr != NULL) *ptr++ = '\0'; - if(*options != '\0') + if (*options != '\0') root_ino = options; options = ptr; - while(options){ + while (options) { ptr = strchr(options, ','); - if(ptr != NULL) + if (ptr != NULL) *ptr++ = '\0'; - if(*options != '\0'){ - if(!strcmp(options, "append")) + if (*options != '\0') { + if (!strcmp(options, "append")) append = 1; else printf("hostfs_args - unsupported option - %s\n", options); @@ -102,7 +95,7 @@ static char *dentry_name(struct dentry *dentry, int extra) len = 0; parent = dentry; - while(parent->d_parent != parent){ + while (parent->d_parent != parent) { len += parent->d_name.len + 1; parent = parent->d_parent; } @@ -110,12 +103,12 @@ static char *dentry_name(struct dentry *dentry, int extra) root = HOSTFS_I(parent->d_inode)->host_filename; len += strlen(root); name = kmalloc(len + extra + 1, GFP_KERNEL); - if(name == NULL) + if (name == NULL) return NULL; name[len] = '\0'; parent = dentry; - while(parent->d_parent != parent){ + while (parent->d_parent != parent) { len -= parent->d_name.len + 1; name[len] = '/'; strncpy(&name[len + 1], parent->d_name.name, @@ -136,7 +129,8 @@ static char *inode_name(struct inode *ino, int extra) static int read_name(struct inode *ino, char *name) { - /* The non-int inode fields are copied into ints by stat_file and + /* + * The non-int inode fields are copied into ints by stat_file and * then copied into the inode because passing the actual pointers * in and having them treated as int * breaks on big-endian machines */ @@ -149,7 +143,7 @@ static int read_name(struct inode *ino, char *name) err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, &ino->i_ctime, &i_blksize, &i_blocks, -1); - if(err) + if (err) return err; ino->i_ino = i_ino; @@ -166,33 +160,33 @@ static char *follow_link(char *link) char *name, *resolved, *end; len = 64; - while(1){ + while (1) { n = -ENOMEM; name = kmalloc(len, GFP_KERNEL); - if(name == NULL) + if (name == NULL) goto out; n = do_readlink(link, name, len); - if(n < len) + if (n < len) break; len *= 2; kfree(name); } - if(n < 0) + if (n < 0) goto out_free; - if(*name == '/') + if (*name == '/') return name; end = strrchr(link, '/'); - if(end == NULL) + if (end == NULL) return name; *(end + 1) = '\0'; len = strlen(link) + strlen(name) + 1; resolved = kmalloc(len, GFP_KERNEL); - if(resolved == NULL){ + if (resolved == NULL) { n = -ENOMEM; goto out_free; } @@ -213,20 +207,21 @@ static int read_inode(struct inode *ino) char *name; int err = 0; - /* Unfortunately, we are called from iget() when we don't have a dentry + /* + * Unfortunately, we are called from iget() when we don't have a dentry * allocated yet. */ - if(list_empty(&ino->i_dentry)) + if (list_empty(&ino->i_dentry)) goto out; err = -ENOMEM; name = inode_name(ino, 0); - if(name == NULL) + if (name == NULL) goto out; - if(file_type(name, NULL, NULL) == OS_TYPE_SYMLINK){ + if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) { name = follow_link(name); - if(IS_ERR(name)){ + if (IS_ERR(name)) { err = PTR_ERR(name); goto out; } @@ -240,7 +235,8 @@ static int read_inode(struct inode *ino) int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) { - /* do_statfs uses struct statfs64 internally, but the linux kernel + /* + * do_statfs uses struct statfs64 internally, but the linux kernel * struct statfs still has 32-bit versions for most of these fields, * so we convert them here */ @@ -255,7 +251,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), &sf->f_namelen, sf->f_spare); - if(err) + if (err) return err; sf->f_blocks = f_blocks; sf->f_bfree = f_bfree; @@ -271,7 +267,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) struct hostfs_inode_info *hi; hi = kmalloc(sizeof(*hi), GFP_KERNEL); - if(hi == NULL) + if (hi == NULL) return NULL; *hi = ((struct hostfs_inode_info) { .host_filename = NULL, @@ -284,7 +280,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) static void hostfs_delete_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - if(HOSTFS_I(inode)->fd != -1) { + if (HOSTFS_I(inode)->fd != -1) { close_file(&HOSTFS_I(inode)->fd); HOSTFS_I(inode)->fd = -1; } @@ -295,9 +291,11 @@ static void hostfs_destroy_inode(struct inode *inode) { kfree(HOSTFS_I(inode)->host_filename); - /*XXX: This should not happen, probably. The check is here for - * additional safety.*/ - if(HOSTFS_I(inode)->fd != -1) { + /* + * XXX: This should not happen, probably. The check is here for + * additional safety. + */ + if (HOSTFS_I(inode)->fd != -1) { close_file(&HOSTFS_I(inode)->fd); printk(KERN_DEBUG "Closing host fd in .destroy_inode\n"); } @@ -327,17 +325,17 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) int error, len; name = dentry_name(file->f_path.dentry, 0); - if(name == NULL) + if (name == NULL) return -ENOMEM; dir = open_dir(name, &error); kfree(name); - if(dir == NULL) + if (dir == NULL) return -error; next = file->f_pos; - while((name = read_dir(dir, &next, &ino, &len)) != NULL){ + while ((name = read_dir(dir, &next, &ino, &len)) != NULL) { error = (*filldir)(ent, name, len, file->f_pos, ino, DT_UNKNOWN); - if(error) break; + if (error) break; file->f_pos = next; } close_dir(dir); @@ -350,32 +348,33 @@ int hostfs_file_open(struct inode *ino, struct file *file) int mode = 0, r = 0, w = 0, fd; mode = file->f_mode & (FMODE_READ | FMODE_WRITE); - if((mode & HOSTFS_I(ino)->mode) == mode) + if ((mode & HOSTFS_I(ino)->mode) == mode) return 0; - /* The file may already have been opened, but with the wrong access, + /* + * The file may already have been opened, but with the wrong access, * so this resets things and reopens the file with the new access. */ - if(HOSTFS_I(ino)->fd != -1){ + if (HOSTFS_I(ino)->fd != -1) { close_file(&HOSTFS_I(ino)->fd); HOSTFS_I(ino)->fd = -1; } HOSTFS_I(ino)->mode |= mode; - if(HOSTFS_I(ino)->mode & FMODE_READ) + if (HOSTFS_I(ino)->mode & FMODE_READ) r = 1; - if(HOSTFS_I(ino)->mode & FMODE_WRITE) + if (HOSTFS_I(ino)->mode & FMODE_WRITE) w = 1; - if(w) + if (w) r = 1; name = dentry_name(file->f_path.dentry, 0); - if(name == NULL) + if (name == NULL) return -ENOMEM; fd = open_file(name, r, w, append); kfree(name); - if(fd < 0) + if (fd < 0) return fd; FILE_HOSTFS_I(file)->fd = fd; @@ -423,7 +422,7 @@ int hostfs_writepage(struct page *page, struct writeback_control *wbc) base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); - if(err != count){ + if (err != count) { ClearPageUptodate(page); goto out; } @@ -452,7 +451,8 @@ int hostfs_readpage(struct file *file, struct page *page) buffer = kmap(page); err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, PAGE_CACHE_SIZE); - if(err < 0) goto out; + if (err < 0) + goto out; memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); @@ -466,56 +466,43 @@ int hostfs_readpage(struct file *file, struct page *page) return err; } -int hostfs_prepare_write(struct file *file, struct page *page, - unsigned int from, unsigned int to) +int hostfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - char *buffer; - long long start, tmp; - int err; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; - start = (long long) page->index << PAGE_CACHE_SHIFT; - buffer = kmap(page); - if(from != 0){ - tmp = start; - err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer, - from); - if(err < 0) goto out; - } - if(to != PAGE_CACHE_SIZE){ - start += to; - err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to, - PAGE_CACHE_SIZE - to); - if(err < 0) goto out; - } - err = 0; - out: - kunmap(page); - return err; + *pagep = __grab_cache_page(mapping, index); + if (!*pagep) + return -ENOMEM; + return 0; } -int hostfs_commit_write(struct file *file, struct page *page, unsigned from, - unsigned to) +int hostfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - char *buffer; - long long start; - int err = 0; + void *buffer; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + int err; - start = (((long long) page->index) << PAGE_CACHE_SHIFT) + from; buffer = kmap(page); - err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from, - to - from); - if(err > 0) err = 0; + err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied); + kunmap(page); - /* Actually, if !err, write_file has added to-from to start, so, despite - * the appearance, we are comparing i_size against the _last_ written - * location, as we should. */ + if (!PageUptodate(page) && err == PAGE_CACHE_SIZE) + SetPageUptodate(page); - if(!err && (start > inode->i_size)) - inode->i_size = start; + /* + * If err > 0, write_file has added err to pos, so we are comparing + * i_size against the last byte written. + */ + if (err > 0 && (pos > inode->i_size)) + inode->i_size = pos; + unlock_page(page); + page_cache_release(page); - kunmap(page); return err; } @@ -523,8 +510,8 @@ static const struct address_space_operations hostfs_aops = { .writepage = hostfs_writepage, .readpage = hostfs_readpage, .set_page_dirty = __set_page_dirty_nobuffers, - .prepare_write = hostfs_prepare_write, - .commit_write = hostfs_commit_write + .write_begin = hostfs_write_begin, + .write_end = hostfs_write_end, }; static int init_inode(struct inode *inode, struct dentry *dentry) @@ -534,28 +521,28 @@ static int init_inode(struct inode *inode, struct dentry *dentry) int maj, min; dev_t rdev = 0; - if(dentry){ + if (dentry) { name = dentry_name(dentry, 0); - if(name == NULL) + if (name == NULL) goto out; type = file_type(name, &maj, &min); - /*Reencode maj and min with the kernel encoding.*/ + /* Reencode maj and min with the kernel encoding.*/ rdev = MKDEV(maj, min); kfree(name); } else type = OS_TYPE_DIR; err = 0; - if(type == OS_TYPE_SYMLINK) + if (type == OS_TYPE_SYMLINK) inode->i_op = &page_symlink_inode_operations; - else if(type == OS_TYPE_DIR) + else if (type == OS_TYPE_DIR) inode->i_op = &hostfs_dir_iops; else inode->i_op = &hostfs_iops; - if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; + if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; else inode->i_fop = &hostfs_file_fops; - if(type == OS_TYPE_SYMLINK) + if (type == OS_TYPE_SYMLINK) inode->i_mapping->a_ops = &hostfs_link_aops; else inode->i_mapping->a_ops = &hostfs_aops; @@ -578,7 +565,7 @@ static int init_inode(struct inode *inode, struct dentry *dentry) } int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) + struct nameidata *nd) { struct inode *inode; char *name; @@ -586,27 +573,28 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, error = -ENOMEM; inode = iget(dir->i_sb, 0); - if(inode == NULL) goto out; + if (inode == NULL) + goto out; error = init_inode(inode, dentry); - if(error) + if (error) goto out_put; error = -ENOMEM; name = dentry_name(dentry, 0); - if(name == NULL) + if (name == NULL) goto out_put; fd = file_create(name, mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); - if(fd < 0) + if (fd < 0) error = fd; else error = read_name(inode, name); kfree(name); - if(error) + if (error) goto out_put; HOSTFS_I(inode)->fd = fd; @@ -629,25 +617,25 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, err = -ENOMEM; inode = iget(ino->i_sb, 0); - if(inode == NULL) + if (inode == NULL) goto out; err = init_inode(inode, dentry); - if(err) + if (err) goto out_put; err = -ENOMEM; name = dentry_name(dentry, 0); - if(name == NULL) + if (name == NULL) goto out_put; err = read_name(inode, name); kfree(name); - if(err == -ENOENT){ + if (err == -ENOENT) { iput(inode); inode = NULL; } - else if(err) + else if (err) goto out_put; d_add(dentry, inode); @@ -666,7 +654,7 @@ static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) int len; file = inode_name(ino, dentry->d_name.len + 1); - if(file == NULL) + if (file == NULL) return NULL; strcat(file, "/"); len = strlen(file); @@ -680,10 +668,10 @@ int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) char *from_name, *to_name; int err; - if((from_name = inode_dentry_name(ino, from)) == NULL) + if ((from_name = inode_dentry_name(ino, from)) == NULL) return -ENOMEM; to_name = dentry_name(to, 0); - if(to_name == NULL){ + if (to_name == NULL) { kfree(from_name); return -ENOMEM; } @@ -698,9 +686,9 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry) char *file; int err; - if((file = inode_dentry_name(ino, dentry)) == NULL) + if ((file = inode_dentry_name(ino, dentry)) == NULL) return -ENOMEM; - if(append) + if (append) return -EPERM; err = unlink_file(file); @@ -713,7 +701,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) char *file; int err; - if((file = inode_dentry_name(ino, dentry)) == NULL) + if ((file = inode_dentry_name(ino, dentry)) == NULL) return -ENOMEM; err = make_symlink(file, to); kfree(file); @@ -725,7 +713,7 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) char *file; int err; - if((file = inode_dentry_name(ino, dentry)) == NULL) + if ((file = inode_dentry_name(ino, dentry)) == NULL) return -ENOMEM; err = do_mkdir(file, mode); kfree(file); @@ -737,7 +725,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) char *file; int err; - if((file = inode_dentry_name(ino, dentry)) == NULL) + if ((file = inode_dentry_name(ino, dentry)) == NULL) return -ENOMEM; err = do_rmdir(file); kfree(file); @@ -751,26 +739,26 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) int err = -ENOMEM; inode = iget(dir->i_sb, 0); - if(inode == NULL) + if (inode == NULL) goto out; err = init_inode(inode, dentry); - if(err) + if (err) goto out_put; err = -ENOMEM; name = dentry_name(dentry, 0); - if(name == NULL) + if (name == NULL) goto out_put; init_special_inode(inode, mode, dev); err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); - if(err) + if (err) goto out_free; err = read_name(inode, name); kfree(name); - if(err) + if (err) goto out_put; d_instantiate(dentry, inode); @@ -790,9 +778,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, char *from_name, *to_name; int err; - if((from_name = inode_dentry_name(from_ino, from)) == NULL) + if ((from_name = inode_dentry_name(from_ino, from)) == NULL) return -ENOMEM; - if((to_name = inode_dentry_name(to_ino, to)) == NULL){ + if ((to_name = inode_dentry_name(to_ino, to)) == NULL) { kfree(from_name); return -ENOMEM; } @@ -815,12 +803,12 @@ int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) return -ENOMEM; if (S_ISCHR(ino->i_mode) || S_ISBLK(ino->i_mode) || - S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode)) + S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode)) err = 0; else err = access_file(name, r, w, x); kfree(name); - if(!err) + if (!err) err = generic_permission(ino, desired, NULL); return err; } @@ -837,62 +825,55 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - if(append) + if (append) attr->ia_valid &= ~ATTR_SIZE; attrs.ia_valid = 0; - if(attr->ia_valid & ATTR_MODE){ + if (attr->ia_valid & ATTR_MODE) { attrs.ia_valid |= HOSTFS_ATTR_MODE; attrs.ia_mode = attr->ia_mode; } - if(attr->ia_valid & ATTR_UID){ + if (attr->ia_valid & ATTR_UID) { attrs.ia_valid |= HOSTFS_ATTR_UID; attrs.ia_uid = attr->ia_uid; } - if(attr->ia_valid & ATTR_GID){ + if (attr->ia_valid & ATTR_GID) { attrs.ia_valid |= HOSTFS_ATTR_GID; attrs.ia_gid = attr->ia_gid; } - if(attr->ia_valid & ATTR_SIZE){ + if (attr->ia_valid & ATTR_SIZE) { attrs.ia_valid |= HOSTFS_ATTR_SIZE; attrs.ia_size = attr->ia_size; } - if(attr->ia_valid & ATTR_ATIME){ + if (attr->ia_valid & ATTR_ATIME) { attrs.ia_valid |= HOSTFS_ATTR_ATIME; attrs.ia_atime = attr->ia_atime; } - if(attr->ia_valid & ATTR_MTIME){ + if (attr->ia_valid & ATTR_MTIME) { attrs.ia_valid |= HOSTFS_ATTR_MTIME; attrs.ia_mtime = attr->ia_mtime; } - if(attr->ia_valid & ATTR_CTIME){ + if (attr->ia_valid & ATTR_CTIME) { attrs.ia_valid |= HOSTFS_ATTR_CTIME; attrs.ia_ctime = attr->ia_ctime; } - if(attr->ia_valid & ATTR_ATIME_SET){ + if (attr->ia_valid & ATTR_ATIME_SET) { attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; } - if(attr->ia_valid & ATTR_MTIME_SET){ + if (attr->ia_valid & ATTR_MTIME_SET) { attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; } name = dentry_name(dentry, 0); - if(name == NULL) + if (name == NULL) return -ENOMEM; err = set_attr(name, &attrs, fd); kfree(name); - if(err) + if (err) return err; return inode_setattr(dentry->d_inode, attr); } -int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) -{ - generic_fillattr(dentry->d_inode, stat); - return 0; -} - static const struct inode_operations hostfs_iops = { .create = hostfs_create, .link = hostfs_link, @@ -904,7 +885,6 @@ static const struct inode_operations hostfs_iops = { .rename = hostfs_rename, .permission = hostfs_permission, .setattr = hostfs_setattr, - .getattr = hostfs_getattr, }; static const struct inode_operations hostfs_dir_iops = { @@ -919,7 +899,6 @@ static const struct inode_operations hostfs_dir_iops = { .rename = hostfs_rename, .permission = hostfs_permission, .setattr = hostfs_setattr, - .getattr = hostfs_getattr, }; int hostfs_link_readpage(struct file *file, struct page *page) @@ -929,13 +908,13 @@ int hostfs_link_readpage(struct file *file, struct page *page) buffer = kmap(page); name = inode_name(page->mapping->host, 0); - if(name == NULL) + if (name == NULL) return -ENOMEM; err = do_readlink(name, buffer, PAGE_CACHE_SIZE); kfree(name); - if(err == PAGE_CACHE_SIZE) + if (err == PAGE_CACHE_SIZE) err = -E2BIG; - else if(err > 0){ + else if (err > 0) { flush_dcache_page(page); SetPageUptodate(page); if (PageError(page)) ClearPageError(page); @@ -968,31 +947,33 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) err = -ENOMEM; host_root_path = kmalloc(strlen(root_ino) + 1 + strlen(req_root) + 1, GFP_KERNEL); - if(host_root_path == NULL) + if (host_root_path == NULL) goto out; sprintf(host_root_path, "%s/%s", root_ino, req_root); root_inode = iget(sb, 0); - if(root_inode == NULL) + if (root_inode == NULL) goto out_free; err = init_inode(root_inode, NULL); - if(err) + if (err) goto out_put; HOSTFS_I(root_inode)->host_filename = host_root_path; - /* Avoid that in the error path, iput(root_inode) frees again - * host_root_path through hostfs_destroy_inode! */ + /* + * Avoid that in the error path, iput(root_inode) frees again + * host_root_path through hostfs_destroy_inode! + */ host_root_path = NULL; err = -ENOMEM; sb->s_root = d_alloc_root(root_inode); - if(sb->s_root == NULL) + if (sb->s_root == NULL) goto out_put; err = read_inode(root_inode); - if(err){ + if (err) { /* No iput in this case because the dput does that for us */ dput(sb->s_root); sb->s_root = NULL; diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 5625e2481dd..35c1a9f33f4 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -3,19 +3,21 @@ * Licensed under the GPL */ -#include <unistd.h> #include <stdio.h> -#include <fcntl.h> +#include <stddef.h> +#include <unistd.h> #include <dirent.h> #include <errno.h> -#include <utime.h> +#include <fcntl.h> #include <string.h> #include <sys/stat.h> #include <sys/time.h> +#include <sys/types.h> #include <sys/vfs.h> #include "hostfs.h" -#include "kern_util.h" +#include "os.h" #include "user.h" +#include <utime.h> int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, int *nlink_out, int *uid_out, int *gid_out, @@ -25,33 +27,41 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, { struct stat64 buf; - if(fd >= 0) { + if (fd >= 0) { if (fstat64(fd, &buf) < 0) return -errno; - } else if(lstat64(path, &buf) < 0) { + } else if (lstat64(path, &buf) < 0) { return -errno; } - if(inode_out != NULL) *inode_out = buf.st_ino; - if(mode_out != NULL) *mode_out = buf.st_mode; - if(nlink_out != NULL) *nlink_out = buf.st_nlink; - if(uid_out != NULL) *uid_out = buf.st_uid; - if(gid_out != NULL) *gid_out = buf.st_gid; - if(size_out != NULL) *size_out = buf.st_size; - if(atime_out != NULL) { + if (inode_out != NULL) + *inode_out = buf.st_ino; + if (mode_out != NULL) + *mode_out = buf.st_mode; + if (nlink_out != NULL) + *nlink_out = buf.st_nlink; + if (uid_out != NULL) + *uid_out = buf.st_uid; + if (gid_out != NULL) + *gid_out = buf.st_gid; + if (size_out != NULL) + *size_out = buf.st_size; + if (atime_out != NULL) { atime_out->tv_sec = buf.st_atime; atime_out->tv_nsec = 0; } - if(mtime_out != NULL) { + if (mtime_out != NULL) { mtime_out->tv_sec = buf.st_mtime; mtime_out->tv_nsec = 0; } - if(ctime_out != NULL) { + if (ctime_out != NULL) { ctime_out->tv_sec = buf.st_ctime; ctime_out->tv_nsec = 0; } - if(blksize_out != NULL) *blksize_out = buf.st_blksize; - if(blocks_out != NULL) *blocks_out = buf.st_blocks; + if (blksize_out != NULL) + *blksize_out = buf.st_blksize; + if (blocks_out != NULL) + *blocks_out = buf.st_blocks; return 0; } @@ -59,21 +69,29 @@ int file_type(const char *path, int *maj, int *min) { struct stat64 buf; - if(lstat64(path, &buf) < 0) + if (lstat64(path, &buf) < 0) return -errno; - /*We cannot pass rdev as is because glibc and the kernel disagree - *about its definition.*/ - if(maj != NULL) + /* + * We cannot pass rdev as is because glibc and the kernel disagree + * about its definition. + */ + if (maj != NULL) *maj = major(buf.st_rdev); - if(min != NULL) + if (min != NULL) *min = minor(buf.st_rdev); - if(S_ISDIR(buf.st_mode)) return OS_TYPE_DIR; - else if(S_ISLNK(buf.st_mode)) return OS_TYPE_SYMLINK; - else if(S_ISCHR(buf.st_mode)) return OS_TYPE_CHARDEV; - else if(S_ISBLK(buf.st_mode)) return OS_TYPE_BLOCKDEV; - else if(S_ISFIFO(buf.st_mode))return OS_TYPE_FIFO; - else if(S_ISSOCK(buf.st_mode))return OS_TYPE_SOCK; + if (S_ISDIR(buf.st_mode)) + return OS_TYPE_DIR; + else if (S_ISLNK(buf.st_mode)) + return OS_TYPE_SYMLINK; + else if (S_ISCHR(buf.st_mode)) + return OS_TYPE_CHARDEV; + else if (S_ISBLK(buf.st_mode)) + return OS_TYPE_BLOCKDEV; + else if (S_ISFIFO(buf.st_mode)) + return OS_TYPE_FIFO; + else if (S_ISSOCK(buf.st_mode)) + return OS_TYPE_SOCK; else return OS_TYPE_FILE; } @@ -81,10 +99,13 @@ int access_file(char *path, int r, int w, int x) { int mode = 0; - if(r) mode = R_OK; - if(w) mode |= W_OK; - if(x) mode |= X_OK; - if(access(path, mode) != 0) + if (r) + mode = R_OK; + if (w) + mode |= W_OK; + if (x) + mode |= X_OK; + if (access(path, mode) != 0) return -errno; else return 0; } @@ -93,18 +114,18 @@ int open_file(char *path, int r, int w, int append) { int mode = 0, fd; - if(r && !w) + if (r && !w) mode = O_RDONLY; - else if(!r && w) + else if (!r && w) mode = O_WRONLY; - else if(r && w) + else if (r && w) mode = O_RDWR; else panic("Impossible mode in open_file"); - if(append) + if (append) mode |= O_APPEND; fd = open64(path, mode); - if(fd < 0) + if (fd < 0) return -errno; else return fd; } @@ -115,7 +136,7 @@ void *open_dir(char *path, int *err_out) dir = opendir(path); *err_out = errno; - if(dir == NULL) + if (dir == NULL) return NULL; return dir; } @@ -128,7 +149,7 @@ char *read_dir(void *stream, unsigned long long *pos, seekdir(dir, *pos); ent = readdir(dir); - if(ent == NULL) + if (ent == NULL) return NULL; *len_out = strlen(ent->d_name); *ino_out = ent->d_ino; @@ -141,7 +162,7 @@ int read_file(int fd, unsigned long long *offset, char *buf, int len) int n; n = pread64(fd, buf, len, *offset); - if(n < 0) + if (n < 0) return -errno; *offset += n; return n; @@ -152,7 +173,7 @@ int write_file(int fd, unsigned long long *offset, const char *buf, int len) int n; n = pwrite64(fd, buf, len, *offset); - if(n < 0) + if (n < 0) return -errno; *offset += n; return n; @@ -163,7 +184,7 @@ int lseek_file(int fd, long long offset, int whence) int ret; ret = lseek64(fd, offset, whence); - if(ret < 0) + if (ret < 0) return -errno; return 0; } @@ -207,7 +228,7 @@ int file_create(char *name, int ur, int uw, int ux, int gr, mode |= ow ? S_IWOTH : 0; mode |= ox ? S_IXOTH : 0; fd = open64(name, O_CREAT | O_RDWR, mode); - if(fd < 0) + if (fd < 0) return -errno; return fd; } @@ -230,7 +251,7 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) if (fd >= 0) { if (fchown(fd, attrs->ia_uid, -1)) return -errno; - } else if(chown(file, attrs->ia_uid, -1)) { + } else if (chown(file, attrs->ia_uid, -1)) { return -errno; } } @@ -251,9 +272,11 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) } } - /* Update accessed and/or modified time, in two parts: first set + /* + * Update accessed and/or modified time, in two parts: first set * times according to the changes to perform, and then call futimes() - * or utimes() to apply them. */ + * or utimes() to apply them. + */ ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET); if (attrs->ia_valid & ma) { err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, @@ -283,12 +306,12 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) } } - if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; - if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ + /* Note: ctime is not handled */ + if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) { err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, &attrs->ia_atime, &attrs->ia_mtime, NULL, NULL, NULL, fd); - if(err != 0) + if (err != 0) return err; } return 0; @@ -299,7 +322,7 @@ int make_symlink(const char *from, const char *to) int err; err = symlink(to, from); - if(err) + if (err) return -errno; return 0; } @@ -309,7 +332,7 @@ int unlink_file(const char *file) int err; err = unlink(file); - if(err) + if (err) return -errno; return 0; } @@ -319,7 +342,7 @@ int do_mkdir(const char *file, int mode) int err; err = mkdir(file, mode); - if(err) + if (err) return -errno; return 0; } @@ -329,7 +352,7 @@ int do_rmdir(const char *file) int err; err = rmdir(file); - if(err) + if (err) return -errno; return 0; } @@ -339,7 +362,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor) int err; err = mknod(file, mode, makedev(major, minor)); - if(err) + if (err) return -errno; return 0; } @@ -349,7 +372,7 @@ int link_file(const char *to, const char *from) int err; err = link(to, from); - if(err) + if (err) return -errno; return 0; } @@ -359,9 +382,9 @@ int do_readlink(char *file, char *buf, int size) int n; n = readlink(file, buf, size); - if(n < 0) + if (n < 0) return -errno; - if(n < size) + if (n < size) buf[n] = '\0'; return n; } @@ -371,7 +394,7 @@ int rename_file(char *from, char *to) int err; err = rename(from, to); - if(err < 0) + if (err < 0) return -errno; return 0; } @@ -386,7 +409,7 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out, int err; err = statfs64(root, &buf); - if(err < 0) + if (err < 0) return -errno; *bsize_out = buf.f_bsize; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 5b53e5c5d8d..be8be5040e0 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -86,25 +86,33 @@ static int hpfs_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page,hpfs_get_block, wbc); } + static int hpfs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,hpfs_get_block); } -static int hpfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) + +static int hpfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return cont_prepare_write(page,from,to,hpfs_get_block, - &hpfs_i(page->mapping->host)->mmu_private); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + hpfs_get_block, + &hpfs_i(mapping->host)->mmu_private); } + static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,hpfs_get_block); } + const struct address_space_operations hpfs_aops = { .readpage = hpfs_readpage, .writepage = hpfs_writepage, .sync_page = block_sync_page, - .prepare_write = hpfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hpfs_write_begin, + .write_end = generic_write_end, .bmap = _hpfs_bmap }; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 950c2fbb815..04598e12c48 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -179,6 +179,130 @@ full_search: } #endif +static int +hugetlbfs_read_actor(struct page *page, unsigned long offset, + char __user *buf, unsigned long count, + unsigned long size) +{ + char *kaddr; + unsigned long left, copied = 0; + int i, chunksize; + + if (size > count) + size = count; + + /* Find which 4k chunk and offset with in that chunk */ + i = offset >> PAGE_CACHE_SHIFT; + offset = offset & ~PAGE_CACHE_MASK; + + while (size) { + chunksize = PAGE_CACHE_SIZE; + if (offset) + chunksize -= offset; + if (chunksize > size) + chunksize = size; + kaddr = kmap(&page[i]); + left = __copy_to_user(buf, kaddr + offset, chunksize); + kunmap(&page[i]); + if (left) { + copied += (chunksize - left); + break; + } + offset = 0; + size -= chunksize; + buf += chunksize; + copied += chunksize; + i++; + } + return copied ? copied : -EFAULT; +} + +/* + * Support for read() - Find the page attached to f_mapping and copy out the + * data. Its *very* similar to do_generic_mapping_read(), we can't use that + * since it has PAGE_CACHE_SIZE assumptions. + */ +static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, + size_t len, loff_t *ppos) +{ + struct address_space *mapping = filp->f_mapping; + struct inode *inode = mapping->host; + unsigned long index = *ppos >> HPAGE_SHIFT; + unsigned long offset = *ppos & ~HPAGE_MASK; + unsigned long end_index; + loff_t isize; + ssize_t retval = 0; + + mutex_lock(&inode->i_mutex); + + /* validate length */ + if (len == 0) + goto out; + + isize = i_size_read(inode); + if (!isize) + goto out; + + end_index = (isize - 1) >> HPAGE_SHIFT; + for (;;) { + struct page *page; + int nr, ret; + + /* nr is the maximum number of bytes to copy from this page */ + nr = HPAGE_SIZE; + if (index >= end_index) { + if (index > end_index) + goto out; + nr = ((isize - 1) & ~HPAGE_MASK) + 1; + if (nr <= offset) { + goto out; + } + } + nr = nr - offset; + + /* Find the page */ + page = find_get_page(mapping, index); + if (unlikely(page == NULL)) { + /* + * We have a HOLE, zero out the user-buffer for the + * length of the hole or request. + */ + ret = len < nr ? len : nr; + if (clear_user(buf, ret)) + ret = -EFAULT; + } else { + /* + * We have the page, copy it to user space buffer. + */ + ret = hugetlbfs_read_actor(page, offset, buf, len, nr); + } + if (ret < 0) { + if (retval == 0) + retval = ret; + if (page) + page_cache_release(page); + goto out; + } + + offset += ret; + retval += ret; + len -= ret; + index += offset >> HPAGE_SHIFT; + offset &= ~HPAGE_MASK; + + if (page) + page_cache_release(page); + + /* short read or no more work */ + if ((ret != nr) || (len == 0)) + break; + } +out: + *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; + mutex_unlock(&inode->i_mutex); + return retval; +} + /* * Read a page. Again trivial. If it didn't already exist * in the page cache, it is zero-filled. @@ -189,15 +313,19 @@ static int hugetlbfs_readpage(struct file *file, struct page * page) return -EINVAL; } -static int hugetlbfs_prepare_write(struct file *file, - struct page *page, unsigned offset, unsigned to) +static int hugetlbfs_write_begin(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { return -EINVAL; } -static int hugetlbfs_commit_write(struct file *file, - struct page *page, unsigned offset, unsigned to) +static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { + BUG(); return -EINVAL; } @@ -318,21 +446,15 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) } } -/* - * Expanding truncates are not allowed. - */ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) { pgoff_t pgoff; struct address_space *mapping = inode->i_mapping; - if (offset > inode->i_size) - return -EINVAL; - BUG_ON(offset & ~HPAGE_MASK); pgoff = offset >> PAGE_SHIFT; - inode->i_size = offset; + i_size_write(inode, offset); spin_lock(&mapping->i_mmap_lock); if (!prio_tree_empty(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); @@ -569,8 +691,8 @@ static void hugetlbfs_destroy_inode(struct inode *inode) static const struct address_space_operations hugetlbfs_aops = { .readpage = hugetlbfs_readpage, - .prepare_write = hugetlbfs_prepare_write, - .commit_write = hugetlbfs_commit_write, + .write_begin = hugetlbfs_write_begin, + .write_end = hugetlbfs_write_end, .set_page_dirty = hugetlbfs_set_page_dirty, }; @@ -583,6 +705,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) } const struct file_operations hugetlbfs_file_operations = { + .read = hugetlbfs_read, .mmap = hugetlbfs_file_mmap, .fsync = simple_sync_file, .get_unmapped_area = hugetlb_get_unmapped_area, diff --git a/fs/inode.c b/fs/inode.c index f97de0aeb3b..21dab18b2f1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -568,16 +568,16 @@ EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) { #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct file_system_type *type = inode->i_sb->s_type; - /* - * ensure nobody is actually holding i_mutex - */ - mutex_destroy(&inode->i_mutex); - mutex_init(&inode->i_mutex); - if (inode->i_mode & S_IFDIR) + if (inode->i_mode & S_IFDIR) { + struct file_system_type *type = inode->i_sb->s_type; + + /* + * ensure nobody is actually holding i_mutex + */ + mutex_destroy(&inode->i_mutex); + mutex_init(&inode->i_mutex); lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); - else - lockdep_set_class(&inode->i_mutex, &type->i_mutex_key); + } #endif /* * This is special! We do not need the spinlock diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 06ab3c10b1b..a6be78c05dc 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1710,7 +1710,7 @@ static int journal_init_journal_head_cache(void) journal_head_cache = kmem_cache_create("journal_head", sizeof(struct journal_head), 0, /* offset */ - 0, /* flags */ + SLAB_TEMPORARY, /* flags */ NULL); /* ctor */ retval = 0; if (journal_head_cache == 0) { @@ -2006,7 +2006,7 @@ static int __init journal_init_handle_cache(void) jbd_handle_cache = kmem_cache_create("journal_handle", sizeof(handle_t), 0, /* offset */ - 0, /* flags */ + SLAB_TEMPORARY, /* flags */ NULL); /* ctor */ if (jbd_handle_cache == NULL) { printk(KERN_EMERG "JBD: failed to create handle cache\n"); diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 62e13c8db13..ad2eacf570c 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -170,13 +170,15 @@ int __init journal_init_revoke_caches(void) { revoke_record_cache = kmem_cache_create("revoke_record", sizeof(struct jbd_revoke_record_s), - 0, SLAB_HWCACHE_ALIGN, NULL); + 0, + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, + NULL); if (revoke_record_cache == 0) return -ENOMEM; revoke_table_cache = kmem_cache_create("revoke_table", sizeof(struct jbd_revoke_table_s), - 0, 0, NULL); + 0, SLAB_TEMPORARY, NULL); if (revoke_table_cache == 0) { kmem_cache_destroy(revoke_record_cache); revoke_record_cache = NULL; diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index c2530197be0..023a17539dd 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -19,10 +19,12 @@ #include <linux/jffs2.h> #include "nodelist.h" -static int jffs2_commit_write (struct file *filp, struct page *pg, - unsigned start, unsigned end); -static int jffs2_prepare_write (struct file *filp, struct page *pg, - unsigned start, unsigned end); +static int jffs2_write_end(struct file *filp, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *pg, void *fsdata); +static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); static int jffs2_readpage (struct file *filp, struct page *pg); int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync) @@ -65,8 +67,8 @@ const struct inode_operations jffs2_file_inode_operations = const struct address_space_operations jffs2_file_address_operations = { .readpage = jffs2_readpage, - .prepare_write =jffs2_prepare_write, - .commit_write = jffs2_commit_write + .write_begin = jffs2_write_begin, + .write_end = jffs2_write_end, }; static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) @@ -119,15 +121,23 @@ static int jffs2_readpage (struct file *filp, struct page *pg) return ret; } -static int jffs2_prepare_write (struct file *filp, struct page *pg, - unsigned start, unsigned end) +static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct inode *inode = pg->mapping->host; + struct page *pg; + struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - uint32_t pageofs = pg->index << PAGE_CACHE_SHIFT; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + uint32_t pageofs = pos & (PAGE_CACHE_SIZE - 1); int ret = 0; - D1(printk(KERN_DEBUG "jffs2_prepare_write()\n")); + pg = __grab_cache_page(mapping, index); + if (!pg) + return -ENOMEM; + *pagep = pg; + + D1(printk(KERN_DEBUG "jffs2_write_begin()\n")); if (pageofs > inode->i_size) { /* Make new hole frag from old EOF to new page */ @@ -142,7 +152,7 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg, ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) - return ret; + goto out_page; down(&f->sem); memset(&ri, 0, sizeof(ri)); @@ -172,7 +182,7 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg, ret = PTR_ERR(fn); jffs2_complete_reservation(c); up(&f->sem); - return ret; + goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); if (f->metadata) { @@ -181,65 +191,79 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg, f->metadata = NULL; } if (ret) { - D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in prepare_write, returned %d\n", ret)); + D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in write_begin, returned %d\n", ret)); jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); up(&f->sem); - return ret; + goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; up(&f->sem); } - /* Read in the page if it wasn't already present, unless it's a whole page */ - if (!PageUptodate(pg) && (start || end < PAGE_CACHE_SIZE)) { + /* + * Read in the page if it wasn't already present. Cannot optimize away + * the whole page write case until jffs2_write_end can handle the + * case of a short-copy. + */ + if (!PageUptodate(pg)) { down(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); up(&f->sem); + if (ret) + goto out_page; } - D1(printk(KERN_DEBUG "end prepare_write(). pg->flags %lx\n", pg->flags)); + D1(printk(KERN_DEBUG "end write_begin(). pg->flags %lx\n", pg->flags)); + return ret; + +out_page: + unlock_page(pg); + page_cache_release(pg); return ret; } -static int jffs2_commit_write (struct file *filp, struct page *pg, - unsigned start, unsigned end) +static int jffs2_write_end(struct file *filp, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *pg, void *fsdata) { /* Actually commit the write from the page cache page we're looking at. * For now, we write the full page out each time. It sucks, but it's simple */ - struct inode *inode = pg->mapping->host; + struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_raw_inode *ri; + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + unsigned end = start + copied; unsigned aligned_start = start & ~3; int ret = 0; uint32_t writtenlen = 0; - D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", + D1(printk(KERN_DEBUG "jffs2_write_end(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags)); + /* We need to avoid deadlock with page_cache_read() in + jffs2_garbage_collect_pass(). So the page must be + up to date to prevent page_cache_read() from trying + to re-lock it. */ + BUG_ON(!PageUptodate(pg)); + if (end == PAGE_CACHE_SIZE) { - if (!start) { - /* We need to avoid deadlock with page_cache_read() in - jffs2_garbage_collect_pass(). So we have to mark the - page up to date, to prevent page_cache_read() from - trying to re-lock it. */ - SetPageUptodate(pg); - } else { - /* When writing out the end of a page, write out the - _whole_ page. This helps to reduce the number of - nodes in files which have many short writes, like - syslog files. */ - start = aligned_start = 0; - } + /* When writing out the end of a page, write out the + _whole_ page. This helps to reduce the number of + nodes in files which have many short writes, like + syslog files. */ + start = aligned_start = 0; } ri = jffs2_alloc_raw_inode(); if (!ri) { - D1(printk(KERN_DEBUG "jffs2_commit_write(): Allocation of raw inode failed\n")); + D1(printk(KERN_DEBUG "jffs2_write_end(): Allocation of raw inode failed\n")); + unlock_page(pg); + page_cache_release(pg); return -ENOMEM; } @@ -287,11 +311,14 @@ static int jffs2_commit_write (struct file *filp, struct page *pg, /* generic_file_write has written more to the page cache than we've actually written to the medium. Mark the page !Uptodate so that it gets reread */ - D1(printk(KERN_DEBUG "jffs2_commit_write(): Not all bytes written. Marking page !uptodate\n")); + D1(printk(KERN_DEBUG "jffs2_write_end(): Not all bytes written. Marking page !uptodate\n")); SetPageError(pg); ClearPageUptodate(pg); } - D1(printk(KERN_DEBUG "jffs2_commit_write() returning %d\n",start+writtenlen==end?0:ret)); - return start+writtenlen==end?0:ret; + D1(printk(KERN_DEBUG "jffs2_write_end() returning %d\n", + writtenlen > 0 ? writtenlen : ret)); + unlock_page(pg); + page_cache_release(pg); + return writtenlen > 0 ? writtenlen : ret; } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 3467dde27e5..4672013802e 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -255,7 +255,7 @@ int jfs_get_block(struct inode *ip, sector_t lblock, static int jfs_writepage(struct page *page, struct writeback_control *wbc) { - return nobh_writepage(page, jfs_get_block, wbc); + return block_write_full_page(page, jfs_get_block, wbc); } static int jfs_writepages(struct address_space *mapping, @@ -275,10 +275,12 @@ static int jfs_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); } -static int jfs_prepare_write(struct file *file, - struct page *page, unsigned from, unsigned to) +static int jfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return nobh_prepare_write(page, from, to, jfs_get_block); + return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + jfs_get_block); } static sector_t jfs_bmap(struct address_space *mapping, sector_t block) @@ -302,8 +304,8 @@ const struct address_space_operations jfs_aops = { .writepage = jfs_writepage, .writepages = jfs_writepages, .sync_page = block_sync_page, - .prepare_write = jfs_prepare_write, - .commit_write = nobh_commit_write, + .write_begin = jfs_write_begin, + .write_end = nobh_write_end, .bmap = jfs_bmap, .direct_IO = jfs_direct_IO, }; @@ -356,7 +358,7 @@ void jfs_truncate(struct inode *ip) { jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); - nobh_truncate_page(ip->i_mapping, ip->i_size); + nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); IWRITE_LOCK(ip, RDWRLOCK_NORMAL); jfs_truncate_nolock(ip, ip->i_size); diff --git a/fs/libfs.c b/fs/libfs.c index 5294de1f40c..f2b32d3a909 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -351,6 +351,26 @@ int simple_prepare_write(struct file *file, struct page *page, return 0; } +int simple_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct page *page; + pgoff_t index; + unsigned from; + + index = pos >> PAGE_CACHE_SHIFT; + from = pos & (PAGE_CACHE_SIZE - 1); + + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + + *pagep = page; + + return simple_prepare_write(file, page, from, from+len); +} + int simple_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -369,6 +389,28 @@ int simple_commit_write(struct file *file, struct page *page, return 0; } +int simple_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + + /* zero the stale part of the page if we did a short copy */ + if (copied < len) { + void *kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + from + copied, 0, len - copied); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + + simple_commit_write(file, page, from, from+copied); + + unlock_page(page); + page_cache_release(page); + + return copied; +} + /* * the inodes created here are not hashed. If you use iunique to generate * unique inode values later for this filesystem, then you must take care @@ -642,6 +684,8 @@ EXPORT_SYMBOL(dcache_dir_open); EXPORT_SYMBOL(dcache_readdir); EXPORT_SYMBOL(generic_read_dir); EXPORT_SYMBOL(get_sb_pseudo); +EXPORT_SYMBOL(simple_write_begin); +EXPORT_SYMBOL(simple_write_end); EXPORT_SYMBOL(simple_commit_write); EXPORT_SYMBOL(simple_dir_inode_operations); EXPORT_SYMBOL(simple_dir_operations); diff --git a/fs/minix/dir.c b/fs/minix/dir.c index e207cbe7095..f70433816a3 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -9,8 +9,10 @@ */ #include "minix.h" +#include <linux/buffer_head.h> #include <linux/highmem.h> #include <linux/smp_lock.h> +#include <linux/swap.h> typedef struct minix_dir_entry minix_dirent; typedef struct minix3_dir_entry minix3_dirent; @@ -48,11 +50,17 @@ static inline unsigned long dir_pages(struct inode *inode) return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; } -static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) +static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = (struct inode *)page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; - page->mapping->a_ops->commit_write(NULL, page, from, to); + block_write_end(NULL, mapping, pos, len, len, page, NULL); + + if (pos+len > dir->i_size) { + i_size_write(dir, pos+len); + mark_inode_dirty(dir); + } if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -220,7 +228,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode) char *kaddr, *p; minix_dirent *de; minix3_dirent *de3; - unsigned from, to; + loff_t pos; int err; char *namx = NULL; __u32 inumber; @@ -272,9 +280,9 @@ int minix_add_link(struct dentry *dentry, struct inode *inode) return -EINVAL; got_it: - from = p - (char*)page_address(page); - to = from + sbi->s_dirsize; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + pos = (page->index >> PAGE_CACHE_SHIFT) + p - (char*)page_address(page); + err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) goto out_unlock; memcpy (namx, name, namelen); @@ -285,7 +293,7 @@ got_it: memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2); de->inode = inode->i_ino; } - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, sbi->s_dirsize); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); out_put: @@ -302,15 +310,16 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page) struct address_space *mapping = page->mapping; struct inode *inode = (struct inode*)mapping->host; char *kaddr = page_address(page); - unsigned from = (char*)de - kaddr; - unsigned to = from + minix_sb(inode->i_sb)->s_dirsize; + loff_t pos = page_offset(page) + (char*)de - kaddr; + unsigned len = minix_sb(inode->i_sb)->s_dirsize; int err; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = __minix_write_begin(NULL, mapping, pos, len, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err == 0) { de->inode = 0; - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, len); } else { unlock_page(page); } @@ -330,7 +339,8 @@ int minix_make_empty(struct inode *inode, struct inode *dir) if (!page) return -ENOMEM; - err = mapping->a_ops->prepare_write(NULL, page, 0, 2 * sbi->s_dirsize); + err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) { unlock_page(page); goto fail; @@ -421,17 +431,20 @@ not_empty: void minix_set_link(struct minix_dir_entry *de, struct page *page, struct inode *inode) { - struct inode *dir = (struct inode*)page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; struct minix_sb_info *sbi = minix_sb(dir->i_sb); - unsigned from = (char *)de-(char*)page_address(page); - unsigned to = from + sbi->s_dirsize; + loff_t pos = page_offset(page) + + (char *)de-(char*)page_address(page); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + + err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err == 0) { de->inode = inode->i_ino; - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, sbi->s_dirsize); } else { unlock_page(page); } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 43668d7d668..f4f3343b180 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -346,24 +346,39 @@ static int minix_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, minix_get_block, wbc); } + static int minix_readpage(struct file *file, struct page *page) { return block_read_full_page(page,minix_get_block); } -static int minix_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) + +int __minix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page,from,to,minix_get_block); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + minix_get_block); } + +static int minix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata); +} + static sector_t minix_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,minix_get_block); } + static const struct address_space_operations minix_aops = { .readpage = minix_readpage, .writepage = minix_writepage, .sync_page = block_sync_page, - .prepare_write = minix_prepare_write, - .commit_write = generic_commit_write, + .write_begin = minix_write_begin, + .write_end = generic_write_end, .bmap = minix_bmap }; diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 73ef84f8fb0..ac5d3a75cb0 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -54,6 +54,9 @@ extern int minix_new_block(struct inode * inode); extern void minix_free_block(struct inode *inode, unsigned long block); extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int __minix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); extern void V1_minix_truncate(struct inode *); extern void V2_minix_truncate(struct inode *); diff --git a/fs/mpage.c b/fs/mpage.c index b1c3e589050..d54f8f89722 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -379,31 +379,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, struct bio *bio = NULL; unsigned page_idx; sector_t last_block_in_bio = 0; - struct pagevec lru_pvec; struct buffer_head map_bh; unsigned long first_logical_block = 0; clear_buffer_mapped(&map_bh); - pagevec_init(&lru_pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); prefetchw(&page->flags); list_del(&page->lru); - if (!add_to_page_cache(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) { bio = do_mpage_readpage(bio, page, nr_pages - page_idx, &last_block_in_bio, &map_bh, &first_logical_block, get_block); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else { - page_cache_release(page); } + page_cache_release(page); } - pagevec_lru_add(&lru_pvec); BUG_ON(!list_empty(pages)); if (bio) mpage_bio_submit(READ, bio); diff --git a/fs/namei.c b/fs/namei.c index a83160acd74..b40b8084eef 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2729,53 +2729,29 @@ int __page_symlink(struct inode *inode, const char *symname, int len, { struct address_space *mapping = inode->i_mapping; struct page *page; + void *fsdata; int err; char *kaddr; retry: - err = -ENOMEM; - page = find_or_create_page(mapping, 0, gfp_mask); - if (!page) - goto fail; - err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); - if (err == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry; - } + err = pagecache_write_begin(NULL, mapping, 0, len-1, + AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); if (err) - goto fail_map; + goto fail; + kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, len-1); kunmap_atomic(kaddr, KM_USER0); - err = mapping->a_ops->commit_write(NULL, page, 0, len-1); - if (err == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry; - } - if (err) - goto fail_map; - /* - * Notice that we are _not_ going to block here - end of page is - * unmapped, so this will only try to map the rest of page, see - * that it is unmapped (typically even will not look into inode - - * ->i_size will be enough for everything) and zero it out. - * OTOH it's obviously correct and should make the page up-to-date. - */ - if (!PageUptodate(page)) { - err = mapping->a_ops->readpage(NULL, page); - if (err != AOP_TRUNCATED_PAGE) - wait_on_page_locked(page); - } else { - unlock_page(page); - } - page_cache_release(page); + + err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, + page, fsdata); if (err < 0) goto fail; + if (err < len-1) + goto retry; + mark_inode_dirty(inode); return 0; -fail_map: - unlock_page(page); - page_cache_release(page); fail: return err; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 08c7c7387fc..d29f90d00aa 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -306,27 +306,50 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync) } /* - * This does the "real" work of the write. The generic routine has - * allocated the page, locked it, done all the page alignment stuff - * calculations etc. Now we should just copy the data from user - * space and write it back to the real medium.. + * This does the "real" work of the write. We must allocate and lock the + * page to be sent back to the generic routine, which then copies the + * data from user space. * * If the writer ends up delaying the write, the writer needs to * increment the page use counts until he is done with the page. */ -static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +static int nfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return nfs_flush_incompatible(file, page); + int ret; + pgoff_t index; + struct page *page; + index = pos >> PAGE_CACHE_SHIFT; + + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + + ret = nfs_flush_incompatible(file, page); + if (ret) { + unlock_page(page); + page_cache_release(page); + } + return ret; } -static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +static int nfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - long status; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + int status; lock_kernel(); - status = nfs_updatepage(file, page, offset, to-offset); + status = nfs_updatepage(file, page, offset, copied); unlock_kernel(); - return status; + + unlock_page(page); + page_cache_release(page); + + return status < 0 ? status : copied; } static void nfs_invalidate_page(struct page *page, unsigned long offset) @@ -354,8 +377,8 @@ const struct address_space_operations nfs_file_aops = { .set_page_dirty = __set_page_dirty_nobuffers, .writepage = nfs_writepage, .writepages = nfs_writepages, - .prepare_write = nfs_prepare_write, - .commit_write = nfs_commit_write, + .write_begin = nfs_write_begin, + .write_end = nfs_write_end, .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, #ifdef CONFIG_NFS_DIRECTIO @@ -369,18 +392,35 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) struct file *filp = vma->vm_file; unsigned pagelen; int ret = -EINVAL; + void *fsdata; + struct address_space *mapping; + loff_t offset; lock_page(page); - if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) - goto out_unlock; + mapping = page->mapping; + if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) { + unlock_page(page); + return -EINVAL; + } pagelen = nfs_page_length(page); - if (pagelen == 0) - goto out_unlock; - ret = nfs_prepare_write(filp, page, 0, pagelen); - if (!ret) - ret = nfs_commit_write(filp, page, 0, pagelen); -out_unlock: + offset = (loff_t)page->index << PAGE_CACHE_SHIFT; unlock_page(page); + + /* + * we can use mapping after releasing the page lock, because: + * we hold mmap_sem on the fault path, which should pin the vma + * which should pin the file, which pins the dentry which should + * hold a reference on inode. + */ + + if (pagelen) { + struct page *page2 = NULL; + ret = nfs_write_begin(filp, mapping, offset, pagelen, + 0, &page2, &fsdata); + if (!ret) + ret = nfs_write_end(filp, mapping, offset, pagelen, + pagelen, page2, fsdata); + } return ret; } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index cba899a3494..04b26672980 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -861,9 +861,9 @@ exp_get_fsid_key(svc_client *clp, int fsid) return exp_find_key(clp, FSID_NUM, fsidv, NULL); } -svc_export * -exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, - struct cache_req *reqp) +static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, + struct dentry *dentry, + struct cache_req *reqp) { struct svc_export *exp, key; int err; @@ -887,9 +887,9 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, /* * Find the export entry for a given dentry. */ -struct svc_export * -exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, - struct cache_req *reqp) +static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt, + struct dentry *dentry, + struct cache_req *reqp) { svc_export *exp; @@ -1214,9 +1214,8 @@ out: return err; } -struct svc_export * -exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, - struct cache_req *reqp) +static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, + u32 *fsidv, struct cache_req *reqp) { struct svc_export *exp; struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 34d10452c56..c69c1b30015 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1724,9 +1724,9 @@ out: return ret; } -int ocfs2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +static int ocfs2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { int ret; struct buffer_head *di_bh = NULL; @@ -1877,9 +1877,9 @@ out_write_size: return copied; } -int ocfs2_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) +static int ocfs2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { int ret; struct inode *inode = mapping->host; @@ -1896,6 +1896,8 @@ int ocfs2_write_end(struct file *file, struct address_space *mapping, const struct address_space_operations ocfs2_aops = { .readpage = ocfs2_readpage, .writepage = ocfs2_writepage, + .write_begin = ocfs2_write_begin, + .write_end = ocfs2_write_end, .bmap = ocfs2_bmap, .sync_page = block_sync_page, .direct_IO = ocfs2_direct_IO, diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 113560877db..503e49232e1 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -44,14 +44,6 @@ int walk_page_buffers( handle_t *handle, int (*fn)( handle_t *handle, struct buffer_head *bh)); -int ocfs2_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata); - -int ocfs2_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); - int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a62b14eb406..f92fe91ff26 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1881,143 +1881,13 @@ out: return ret; } -static inline void -ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes) -{ - const struct iovec *iov = *iovp; - size_t base = *basep; - - do { - int copy = min(bytes, iov->iov_len - base); - - bytes -= copy; - base += copy; - if (iov->iov_len == base) { - iov++; - base = 0; - } - } while (bytes); - *iovp = iov; - *basep = base; -} - -static struct page * ocfs2_get_write_source(char **ret_src_buf, - const struct iovec *cur_iov, - size_t iov_offset) -{ - int ret; - char *buf = cur_iov->iov_base + iov_offset; - struct page *src_page = NULL; - unsigned long off; - - off = (unsigned long)(buf) & ~PAGE_CACHE_MASK; - - if (!segment_eq(get_fs(), KERNEL_DS)) { - /* - * Pull in the user page. We want to do this outside - * of the meta data locks in order to preserve locking - * order in case of page fault. - */ - ret = get_user_pages(current, current->mm, - (unsigned long)buf & PAGE_CACHE_MASK, 1, - 0, 0, &src_page, NULL); - if (ret == 1) - *ret_src_buf = kmap(src_page) + off; - else - src_page = ERR_PTR(-EFAULT); - } else { - *ret_src_buf = buf; - } - - return src_page; -} - -static void ocfs2_put_write_source(struct page *page) -{ - if (page) { - kunmap(page); - page_cache_release(page); - } -} - -static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, - const struct iovec *iov, - unsigned long nr_segs, - size_t count, - ssize_t o_direct_written) -{ - int ret = 0; - ssize_t copied, total = 0; - size_t iov_offset = 0, bytes; - loff_t pos; - const struct iovec *cur_iov = iov; - struct page *user_page, *page; - char * uninitialized_var(buf); - char *dst; - void *fsdata; - - /* - * handle partial DIO write. Adjust cur_iov if needed. - */ - ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); - - do { - pos = *ppos; - - user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset); - if (IS_ERR(user_page)) { - ret = PTR_ERR(user_page); - goto out; - } - - /* Stay within our page boundaries */ - bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)), - (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK))); - /* Stay within the vector boundary */ - bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset); - /* Stay within count */ - bytes = min(bytes, count); - - page = NULL; - ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0, - &page, &fsdata); - if (ret) { - mlog_errno(ret); - goto out; - } - - dst = kmap_atomic(page, KM_USER0); - memcpy(dst + (pos & (loff_t)(PAGE_CACHE_SIZE - 1)), buf, bytes); - kunmap_atomic(dst, KM_USER0); - flush_dcache_page(page); - ocfs2_put_write_source(user_page); - - copied = ocfs2_write_end(file, file->f_mapping, pos, bytes, - bytes, page, fsdata); - if (copied < 0) { - mlog_errno(copied); - ret = copied; - goto out; - } - - total += copied; - *ppos = pos + copied; - count -= copied; - - ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); - } while(count); - -out: - return total ? total : ret; -} - static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { int ret, direct_io, appending, rw_level, have_alloc_sem = 0; - int can_do_direct, sync = 0; + int can_do_direct; ssize_t written = 0; size_t ocount; /* original count */ size_t count; /* after file limit checks */ @@ -2033,12 +1903,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (iocb->ki_left == 0) return 0; - ret = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - - count = ocount; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); appending = file->f_flags & O_APPEND ? 1 : 0; @@ -2082,33 +1946,23 @@ relock: rw_level = -1; direct_io = 0; - sync = 1; goto relock; } - if (!sync && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) - sync = 1; - - /* - * XXX: Is it ok to execute these checks a second time? - */ - ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode)); - if (ret) - goto out; - - /* - * Set pos so that sync_page_range_nolock() below understands - * where to start from. We might've moved it around via the - * calls above. The range we want to actually sync starts from - * *ppos here. - * - */ - pos = *ppos; - /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); if (direct_io) { + ret = generic_segment_checks(iov, &nr_segs, &ocount, + VERIFY_READ); + if (ret) + goto out_dio; + + ret = generic_write_checks(file, ppos, &count, + S_ISBLK(inode->i_mode)); + if (ret) + goto out_dio; + written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, ppos, count, ocount); if (written < 0) { @@ -2116,14 +1970,8 @@ relock: goto out_dio; } } else { - written = ocfs2_file_buffered_write(file, ppos, iov, nr_segs, - count, written); - if (written < 0) { - ret = written; - if (ret != -EFAULT || ret != -ENOSPC) - mlog_errno(ret); - goto out; - } + written = generic_file_aio_write_nolock(iocb, iov, nr_segs, + *ppos); } out_dio: @@ -2153,97 +2001,12 @@ out_sems: if (have_alloc_sem) up_read(&inode->i_alloc_sem); - if (written > 0 && sync) { - ssize_t err; - - err = sync_page_range_nolock(inode, file->f_mapping, pos, count); - if (err < 0) - written = err; - } - mutex_unlock(&inode->i_mutex); mlog_exit(ret); return written ? written : ret; } -static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe, - struct pipe_buffer *buf, - struct splice_desc *sd) -{ - int ret, count; - ssize_t copied = 0; - struct file *file = sd->u.file; - unsigned int offset; - struct page *page = NULL; - void *fsdata; - char *src, *dst; - - ret = buf->ops->confirm(pipe, buf); - if (ret) - goto out; - - offset = sd->pos & ~PAGE_CACHE_MASK; - count = sd->len; - if (count + offset > PAGE_CACHE_SIZE) - count = PAGE_CACHE_SIZE - offset; - - ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0, - &page, &fsdata); - if (ret) { - mlog_errno(ret); - goto out; - } - - src = buf->ops->map(pipe, buf, 1); - dst = kmap_atomic(page, KM_USER1); - memcpy(dst + offset, src + buf->offset, count); - kunmap_atomic(dst, KM_USER1); - buf->ops->unmap(pipe, buf, src); - - copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count, - page, fsdata); - if (copied < 0) { - mlog_errno(copied); - ret = copied; - goto out; - } -out: - - return copied ? copied : ret; -} - -static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, - struct file *out, - loff_t *ppos, - size_t len, - unsigned int flags) -{ - int ret, err; - struct address_space *mapping = out->f_mapping; - struct inode *inode = mapping->host; - struct splice_desc sd = { - .total_len = len, - .flags = flags, - .pos = *ppos, - .u.file = out, - }; - - ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor); - if (ret > 0) { - *ppos += ret; - - if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - err = generic_osync_inode(inode, mapping, - OSYNC_METADATA|OSYNC_DATA); - if (err) - ret = err; - } - } - - return ret; -} - static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, @@ -2273,8 +2036,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, goto out_unlock; } - /* ok, we're done with i_size and alloc work */ - ret = __ocfs2_file_splice_write(pipe, out, ppos, len, flags); + ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); out_unlock: ocfs2_rw_unlock(inode, 1); diff --git a/fs/proc/base.c b/fs/proc/base.c index e5d0953d4db..78fdfea1a7f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -492,7 +492,7 @@ static ssize_t proc_info_read(struct file * file, char __user * buf, count = PROC_BLOCK_SIZE; length = -ENOMEM; - if (!(page = __get_free_page(GFP_KERNEL))) + if (!(page = __get_free_page(GFP_TEMPORARY))) goto out; length = PROC_I(inode)->op.proc_read(task, (char*)page); @@ -532,7 +532,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, goto out; ret = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); + page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -602,7 +602,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, goto out; copied = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); + page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; @@ -788,7 +788,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, /* No partial writes. */ return -EINVAL; } - page = (char*)__get_free_page(GFP_USER); + page = (char*)__get_free_page(GFP_TEMPORARY); if (!page) return -ENOMEM; length = -EFAULT; @@ -954,7 +954,8 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, char __user *buffer, int buflen) { struct inode * inode; - char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; + char *tmp = (char*)__get_free_page(GFP_TEMPORARY); + char *path; int len; if (!tmp) @@ -1726,7 +1727,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, goto out; length = -ENOMEM; - page = (char*)__get_free_page(GFP_USER); + page = (char*)__get_free_page(GFP_TEMPORARY); if (!page) goto out; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index b5e7155d30d..1bdb6243575 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -74,7 +74,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes, nbytes = MAX_NON_LFS - pos; dp = PDE(inode); - if (!(page = (char*) __get_free_page(GFP_KERNEL))) + if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) return -ENOMEM; while ((nbytes > 0) && !eof) { diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 0071939c009..5de7f874d95 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -229,6 +229,19 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; +extern struct seq_operations pagetypeinfo_op; +static int pagetypeinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &pagetypeinfo_op); +} + +static const struct file_operations pagetypeinfo_file_ops = { + .open = pagetypeinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + extern struct seq_operations zoneinfo_op; static int zoneinfo_open(struct inode *inode, struct file *file) { @@ -724,6 +737,7 @@ void __init proc_misc_init(void) #endif #endif create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); + create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); #ifdef CONFIG_BLOCK diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 1bc8d873a9e..df8bd87e49b 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -433,16 +433,21 @@ static int qnx4_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page,qnx4_get_block, wbc); } + static int qnx4_readpage(struct file *file, struct page *page) { return block_read_full_page(page,qnx4_get_block); } -static int qnx4_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) + +static int qnx4_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct qnx4_inode_info *qnx4_inode = qnx4_i(page->mapping->host); - return cont_prepare_write(page, from, to, qnx4_get_block, - &qnx4_inode->mmu_private); + struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + qnx4_get_block, + &qnx4_inode->mmu_private); } static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) { @@ -452,8 +457,8 @@ static const struct address_space_operations qnx4_aops = { .readpage = qnx4_readpage, .writepage = qnx4_writepage, .sync_page = block_sync_page, - .prepare_write = qnx4_prepare_write, - .commit_write = generic_commit_write, + .write_begin = qnx4_write_begin, + .write_end = generic_write_end, .bmap = qnx4_bmap }; diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 97bdc0b2f9d..b41a514b097 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -29,8 +29,8 @@ const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write, + .write_begin = simple_write_begin, + .write_end = simple_write_end, .set_page_dirty = __set_page_dirty_no_writeback, }; diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 237fe8b8e81..0989bc2c2f6 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -29,8 +29,8 @@ static int ramfs_nommu_setattr(struct dentry *, struct iattr *); const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write, + .write_begin = simple_write_begin, + .write_end = simple_write_end, .set_page_dirty = __set_page_dirty_no_writeback, }; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 2070aeee2a5..a804903d31d 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -153,608 +153,6 @@ static int reiserfs_sync_file(struct file *p_s_filp, return (n_err < 0) ? -EIO : 0; } -/* I really do not want to play with memory shortage right now, so - to simplify the code, we are not going to write more than this much pages at - a time. This still should considerably improve performance compared to 4k - at a time case. This is 32 pages of 4k size. */ -#define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE - -/* Allocates blocks for a file to fulfil write request. - Maps all unmapped but prepared pages from the list. - Updates metadata with newly allocated blocknumbers as needed */ -static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode we work with */ - loff_t pos, /* Writing position */ - int num_pages, /* number of pages write going - to touch */ - int write_bytes, /* amount of bytes to write */ - struct page **prepared_pages, /* array of - prepared pages - */ - int blocks_to_allocate /* Amount of blocks we - need to allocate to - fit the data into file - */ - ) -{ - struct cpu_key key; // cpu key of item that we are going to deal with - struct item_head *ih; // pointer to item head that we are going to deal with - struct buffer_head *bh; // Buffer head that contains items that we are going to deal with - __le32 *item; // pointer to item we are going to deal with - INITIALIZE_PATH(path); // path to item, that we are going to deal with. - b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. - reiserfs_blocknr_hint_t hint; // hint structure for block allocator. - size_t res; // return value of various functions that we call. - int curr_block; // current block used to keep track of unmapped blocks. - int i; // loop counter - int itempos; // position in item - unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in - // first page - unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */ - __u64 hole_size; // amount of blocks for a file hole, if it needed to be created. - int modifying_this_item = 0; // Flag for items traversal code to keep track - // of the fact that we already prepared - // current block for journal - int will_prealloc = 0; - RFALSE(!blocks_to_allocate, - "green-9004: tried to allocate zero blocks?"); - - /* only preallocate if this is a small write */ - if (REISERFS_I(inode)->i_prealloc_count || - (!(write_bytes & (inode->i_sb->s_blocksize - 1)) && - blocks_to_allocate < - REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize)) - will_prealloc = - REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize; - - allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) * - sizeof(b_blocknr_t), GFP_NOFS); - if (!allocated_blocks) - return -ENOMEM; - - /* First we compose a key to point at the writing position, we want to do - that outside of any locking region. */ - make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ ); - - /* If we came here, it means we absolutely need to open a transaction, - since we need to allocate some blocks */ - reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. - res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough - if (res) - goto error_exit; - reiserfs_update_inode_transaction(inode); - - /* Look for the in-tree position of our write, need path for block allocator */ - res = search_for_position_by_key(inode->i_sb, &key, &path); - if (res == IO_ERROR) { - res = -EIO; - goto error_exit; - } - - /* Allocate blocks */ - /* First fill in "hint" structure for block allocator */ - hint.th = th; // transaction handle. - hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine. - hint.inode = inode; // Inode is needed by block allocator too. - hint.search_start = 0; // We have no hint on where to search free blocks for block allocator. - hint.key = key.on_disk_key; // on disk key of file. - hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); // Number of disk blocks this file occupies already. - hint.formatted_node = 0; // We are allocating blocks for unformatted node. - hint.preallocate = will_prealloc; - - /* Call block allocator to allocate blocks */ - res = - reiserfs_allocate_blocknrs(&hint, allocated_blocks, - blocks_to_allocate, blocks_to_allocate); - if (res != CARRY_ON) { - if (res == NO_DISK_SPACE) { - /* We flush the transaction in case of no space. This way some - blocks might become free */ - SB_JOURNAL(inode->i_sb)->j_must_wait = 1; - res = restart_transaction(th, inode, &path); - if (res) - goto error_exit; - - /* We might have scheduled, so search again */ - res = - search_for_position_by_key(inode->i_sb, &key, - &path); - if (res == IO_ERROR) { - res = -EIO; - goto error_exit; - } - - /* update changed info for hint structure. */ - res = - reiserfs_allocate_blocknrs(&hint, allocated_blocks, - blocks_to_allocate, - blocks_to_allocate); - if (res != CARRY_ON) { - res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC; - pathrelse(&path); - goto error_exit; - } - } else { - res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC; - pathrelse(&path); - goto error_exit; - } - } -#ifdef __BIG_ENDIAN - // Too bad, I have not found any way to convert a given region from - // cpu format to little endian format - { - int i; - for (i = 0; i < blocks_to_allocate; i++) - allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]); - } -#endif - - /* Blocks allocating well might have scheduled and tree might have changed, - let's search the tree again */ - /* find where in the tree our write should go */ - res = search_for_position_by_key(inode->i_sb, &key, &path); - if (res == IO_ERROR) { - res = -EIO; - goto error_exit_free_blocks; - } - - bh = get_last_bh(&path); // Get a bufferhead for last element in path. - ih = get_ih(&path); // Get a pointer to last item head in path. - item = get_item(&path); // Get a pointer to last item in path - - /* Let's see what we have found */ - if (res != POSITION_FOUND) { /* position not found, this means that we - might need to append file with holes - first */ - // Since we are writing past the file's end, we need to find out if - // there is a hole that needs to be inserted before our writing - // position, and how many blocks it is going to cover (we need to - // populate pointers to file blocks representing the hole with zeros) - - { - int item_offset = 1; - /* - * if ih is stat data, its offset is 0 and we don't want to - * add 1 to pos in the hole_size calculation - */ - if (is_statdata_le_ih(ih)) - item_offset = 0; - hole_size = (pos + item_offset - - (le_key_k_offset - (get_inode_item_key_version(inode), - &(ih->ih_key)) + op_bytes_number(ih, - inode-> - i_sb-> - s_blocksize))) - >> inode->i_sb->s_blocksize_bits; - } - - if (hole_size > 0) { - int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE); // How much data to insert first time. - /* area filled with zeroes, to supply as list of zero blocknumbers - We allocate it outside of loop just in case loop would spin for - several iterations. */ - char *zeros = kzalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. - if (!zeros) { - res = -ENOMEM; - goto error_exit_free_blocks; - } - do { - to_paste = - min_t(__u64, hole_size, - MAX_ITEM_LEN(inode->i_sb-> - s_blocksize) / - UNFM_P_SIZE); - if (is_indirect_le_ih(ih)) { - /* Ok, there is existing indirect item already. Need to append it */ - /* Calculate position past inserted item */ - make_cpu_key(&key, inode, - le_key_k_offset - (get_inode_item_key_version - (inode), - &(ih->ih_key)) + - op_bytes_number(ih, - inode-> - i_sb-> - s_blocksize), - TYPE_INDIRECT, 3); - res = - reiserfs_paste_into_item(th, &path, - &key, - inode, - (char *) - zeros, - UNFM_P_SIZE - * - to_paste); - if (res) { - kfree(zeros); - goto error_exit_free_blocks; - } - } else if (is_statdata_le_ih(ih)) { - /* No existing item, create it */ - /* item head for new item */ - struct item_head ins_ih; - - /* create a key for our new item */ - make_cpu_key(&key, inode, 1, - TYPE_INDIRECT, 3); - - /* Create new item head for our new item */ - make_le_item_head(&ins_ih, &key, - key.version, 1, - TYPE_INDIRECT, - to_paste * - UNFM_P_SIZE, - 0 /* free space */ ); - - /* Find where such item should live in the tree */ - res = - search_item(inode->i_sb, &key, - &path); - if (res != ITEM_NOT_FOUND) { - /* item should not exist, otherwise we have error */ - if (res != -ENOSPC) { - reiserfs_warning(inode-> - i_sb, - "green-9008: search_by_key (%K) returned %d", - &key, - res); - } - res = -EIO; - kfree(zeros); - goto error_exit_free_blocks; - } - res = - reiserfs_insert_item(th, &path, - &key, &ins_ih, - inode, - (char *)zeros); - } else { - reiserfs_panic(inode->i_sb, - "green-9011: Unexpected key type %K\n", - &key); - } - if (res) { - kfree(zeros); - goto error_exit_free_blocks; - } - /* Now we want to check if transaction is too full, and if it is - we restart it. This will also free the path. */ - if (journal_transaction_should_end - (th, th->t_blocks_allocated)) { - inode->i_size = cpu_key_k_offset(&key) + - (to_paste << inode->i_blkbits); - res = - restart_transaction(th, inode, - &path); - if (res) { - pathrelse(&path); - kfree(zeros); - goto error_exit; - } - } - - /* Well, need to recalculate path and stuff */ - set_cpu_key_k_offset(&key, - cpu_key_k_offset(&key) + - (to_paste << inode-> - i_blkbits)); - res = - search_for_position_by_key(inode->i_sb, - &key, &path); - if (res == IO_ERROR) { - res = -EIO; - kfree(zeros); - goto error_exit_free_blocks; - } - bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); - hole_size -= to_paste; - } while (hole_size); - kfree(zeros); - } - } - // Go through existing indirect items first - // replace all zeroes with blocknumbers from list - // Note that if no corresponding item was found, by previous search, - // it means there are no existing in-tree representation for file area - // we are going to overwrite, so there is nothing to scan through for holes. - for (curr_block = 0, itempos = path.pos_in_item; - curr_block < blocks_to_allocate && res == POSITION_FOUND;) { - retry: - - if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) { - /* We run out of data in this indirect item, let's look for another - one. */ - /* First if we are already modifying current item, log it */ - if (modifying_this_item) { - journal_mark_dirty(th, inode->i_sb, bh); - modifying_this_item = 0; - } - /* Then set the key to look for a new indirect item (offset of old - item is added to old item length */ - set_cpu_key_k_offset(&key, - le_key_k_offset - (get_inode_item_key_version(inode), - &(ih->ih_key)) + - op_bytes_number(ih, - inode->i_sb-> - s_blocksize)); - /* Search ofor position of new key in the tree. */ - res = - search_for_position_by_key(inode->i_sb, &key, - &path); - if (res == IO_ERROR) { - res = -EIO; - goto error_exit_free_blocks; - } - bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); - itempos = path.pos_in_item; - continue; // loop to check all kinds of conditions and so on. - } - /* Ok, we have correct position in item now, so let's see if it is - representing file hole (blocknumber is zero) and fill it if needed */ - if (!item[itempos]) { - /* Ok, a hole. Now we need to check if we already prepared this - block to be journaled */ - while (!modifying_this_item) { // loop until succeed - /* Well, this item is not journaled yet, so we must prepare - it for journal first, before we can change it */ - struct item_head tmp_ih; // We copy item head of found item, - // here to detect if fs changed under - // us while we were preparing for - // journal. - int fs_gen; // We store fs generation here to find if someone - // changes fs under our feet - - copy_item_head(&tmp_ih, ih); // Remember itemhead - fs_gen = get_generation(inode->i_sb); // remember fs generation - reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing. - if (fs_changed(fs_gen, inode->i_sb) - && item_moved(&tmp_ih, &path)) { - // Sigh, fs was changed under us, we need to look for new - // location of item we are working with - - /* unmark prepaerd area as journaled and search for it's - new position */ - reiserfs_restore_prepared_buffer(inode-> - i_sb, - bh); - res = - search_for_position_by_key(inode-> - i_sb, - &key, - &path); - if (res == IO_ERROR) { - res = -EIO; - goto error_exit_free_blocks; - } - bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); - itempos = path.pos_in_item; - goto retry; - } - modifying_this_item = 1; - } - item[itempos] = allocated_blocks[curr_block]; // Assign new block - curr_block++; - } - itempos++; - } - - if (modifying_this_item) { // We need to log last-accessed block, if it - // was modified, but not logged yet. - journal_mark_dirty(th, inode->i_sb, bh); - } - - if (curr_block < blocks_to_allocate) { - // Oh, well need to append to indirect item, or to create indirect item - // if there weren't any - if (is_indirect_le_ih(ih)) { - // Existing indirect item - append. First calculate key for append - // position. We do not need to recalculate path as it should - // already point to correct place. - make_cpu_key(&key, inode, - le_key_k_offset(get_inode_item_key_version - (inode), - &(ih->ih_key)) + - op_bytes_number(ih, - inode->i_sb->s_blocksize), - TYPE_INDIRECT, 3); - res = - reiserfs_paste_into_item(th, &path, &key, inode, - (char *)(allocated_blocks + - curr_block), - UNFM_P_SIZE * - (blocks_to_allocate - - curr_block)); - if (res) { - goto error_exit_free_blocks; - } - } else if (is_statdata_le_ih(ih)) { - // Last found item was statdata. That means we need to create indirect item. - struct item_head ins_ih; /* itemhead for new item */ - - /* create a key for our new item */ - make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3); // Position one, - // because that's - // where first - // indirect item - // begins - /* Create new item head for our new item */ - make_le_item_head(&ins_ih, &key, key.version, 1, - TYPE_INDIRECT, - (blocks_to_allocate - - curr_block) * UNFM_P_SIZE, - 0 /* free space */ ); - /* Find where such item should live in the tree */ - res = search_item(inode->i_sb, &key, &path); - if (res != ITEM_NOT_FOUND) { - /* Well, if we have found such item already, or some error - occured, we need to warn user and return error */ - if (res != -ENOSPC) { - reiserfs_warning(inode->i_sb, - "green-9009: search_by_key (%K) " - "returned %d", &key, - res); - } - res = -EIO; - goto error_exit_free_blocks; - } - /* Insert item into the tree with the data as its body */ - res = - reiserfs_insert_item(th, &path, &key, &ins_ih, - inode, - (char *)(allocated_blocks + - curr_block)); - } else { - reiserfs_panic(inode->i_sb, - "green-9010: unexpected item type for key %K\n", - &key); - } - } - // the caller is responsible for closing the transaction - // unless we return an error, they are also responsible for logging - // the inode. - // - pathrelse(&path); - /* - * cleanup prellocation from previous writes - * if this is a partial block write - */ - if (write_bytes & (inode->i_sb->s_blocksize - 1)) - reiserfs_discard_prealloc(th, inode); - reiserfs_write_unlock(inode->i_sb); - - // go through all the pages/buffers and map the buffers to newly allocated - // blocks (so that system knows where to write these pages later). - curr_block = 0; - for (i = 0; i < num_pages; i++) { - struct page *page = prepared_pages[i]; //current page - struct buffer_head *head = page_buffers(page); // first buffer for a page - int block_start, block_end; // in-page offsets for buffers. - - if (!page_buffers(page)) - reiserfs_panic(inode->i_sb, - "green-9005: No buffers for prepared page???"); - - /* For each buffer in page */ - for (bh = head, block_start = 0; bh != head || !block_start; - block_start = block_end, bh = bh->b_this_page) { - if (!bh) - reiserfs_panic(inode->i_sb, - "green-9006: Allocated but absent buffer for a page?"); - block_end = block_start + inode->i_sb->s_blocksize; - if (i == 0 && block_end <= from) - /* if this buffer is before requested data to map, skip it */ - continue; - if (i == num_pages - 1 && block_start >= to) - /* If this buffer is after requested data to map, abort - processing of current page */ - break; - - if (!buffer_mapped(bh)) { // Ok, unmapped buffer, need to map it - map_bh(bh, inode->i_sb, - le32_to_cpu(allocated_blocks - [curr_block])); - curr_block++; - set_buffer_new(bh); - } - } - } - - RFALSE(curr_block > blocks_to_allocate, - "green-9007: Used too many blocks? weird"); - - kfree(allocated_blocks); - return 0; - -// Need to deal with transaction here. - error_exit_free_blocks: - pathrelse(&path); - // free blocks - for (i = 0; i < blocks_to_allocate; i++) - reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), - 1); - - error_exit: - if (th->t_trans_id) { - int err; - // update any changes we made to blk count - mark_inode_dirty(inode); - err = - journal_end(th, inode->i_sb, - JOURNAL_PER_BALANCE_CNT * 3 + 1 + - 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); - if (err) - res = err; - } - reiserfs_write_unlock(inode->i_sb); - kfree(allocated_blocks); - - return res; -} - -/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */ -static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */ - size_t num_pages /* amount of pages */ ) -{ - int i; // loop counter - - for (i = 0; i < num_pages; i++) { - struct page *page = prepared_pages[i]; - - try_to_free_buffers(page); - unlock_page(page); - page_cache_release(page); - } -} - -/* This function will copy data from userspace to specified pages within - supplied byte range */ -static int reiserfs_copy_from_user_to_file_region(loff_t pos, /* In-file position */ - int num_pages, /* Number of pages affected */ - int write_bytes, /* Amount of bytes to write */ - struct page **prepared_pages, /* pointer to - array to - prepared pages - */ - const char __user * buf /* Pointer to user-supplied - data */ - ) -{ - long page_fault = 0; // status of copy_from_user. - int i; // loop counter. - int offset; // offset in page - - for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages; - i++, offset = 0) { - size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page - struct page *page = prepared_pages[i]; // Current page we process. - - fault_in_pages_readable(buf, count); - - /* Copy data from userspace to the current page */ - kmap(page); - page_fault = __copy_from_user(page_address(page) + offset, buf, count); // Copy the data. - /* Flush processor's dcache for this page */ - flush_dcache_page(page); - kunmap(page); - buf += count; - write_bytes -= count; - - if (page_fault) - break; // Was there a fault? abort. - } - - return page_fault ? -EFAULT : 0; -} - /* taken fs/buffer.c:__block_commit_write */ int reiserfs_commit_page(struct inode *inode, struct page *page, unsigned from, unsigned to) @@ -824,432 +222,6 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, return ret; } -/* Submit pages for write. This was separated from actual file copying - because we might want to allocate block numbers in-between. - This function assumes that caller will adjust file size to correct value. */ -static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos, /* Writing position offset */ - size_t num_pages, /* Number of pages to write */ - size_t write_bytes, /* number of bytes to write */ - struct page **prepared_pages /* list of pages */ - ) -{ - int status; // return status of block_commit_write. - int retval = 0; // Return value we are going to return. - int i; // loop counter - int offset; // Writing offset in page. - int orig_write_bytes = write_bytes; - int sd_update = 0; - - for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages; - i++, offset = 0) { - int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page - struct page *page = prepared_pages[i]; // Current page we process. - - status = - reiserfs_commit_page(inode, page, offset, offset + count); - if (status) - retval = status; // To not overcomplicate matters We are going to - // submit all the pages even if there was error. - // we only remember error status to report it on - // exit. - write_bytes -= count; - } - /* now that we've gotten all the ordered buffers marked dirty, - * we can safely update i_size and close any running transaction - */ - if (pos + orig_write_bytes > inode->i_size) { - inode->i_size = pos + orig_write_bytes; // Set new size - /* If the file have grown so much that tail packing is no - * longer possible, reset "need to pack" flag */ - if ((have_large_tails(inode->i_sb) && - inode->i_size > i_block_size(inode) * 4) || - (have_small_tails(inode->i_sb) && - inode->i_size > i_block_size(inode))) - REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; - else if ((have_large_tails(inode->i_sb) && - inode->i_size < i_block_size(inode) * 4) || - (have_small_tails(inode->i_sb) && - inode->i_size < i_block_size(inode))) - REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; - - if (th->t_trans_id) { - reiserfs_write_lock(inode->i_sb); - // this sets the proper flags for O_SYNC to trigger a commit - mark_inode_dirty(inode); - reiserfs_write_unlock(inode->i_sb); - } else { - reiserfs_write_lock(inode->i_sb); - reiserfs_update_inode_transaction(inode); - mark_inode_dirty(inode); - reiserfs_write_unlock(inode->i_sb); - } - - sd_update = 1; - } - if (th->t_trans_id) { - reiserfs_write_lock(inode->i_sb); - if (!sd_update) - mark_inode_dirty(inode); - status = journal_end(th, th->t_super, th->t_blocks_allocated); - if (status) - retval = status; - reiserfs_write_unlock(inode->i_sb); - } - th->t_trans_id = 0; - - /* - * we have to unlock the pages after updating i_size, otherwise - * we race with writepage - */ - for (i = 0; i < num_pages; i++) { - struct page *page = prepared_pages[i]; - unlock_page(page); - mark_page_accessed(page); - page_cache_release(page); - } - return retval; -} - -/* Look if passed writing region is going to touch file's tail - (if it is present). And if it is, convert the tail to unformatted node */ -static int reiserfs_check_for_tail_and_convert(struct inode *inode, /* inode to deal with */ - loff_t pos, /* Writing position */ - int write_bytes /* amount of bytes to write */ - ) -{ - INITIALIZE_PATH(path); // needed for search_for_position - struct cpu_key key; // Key that would represent last touched writing byte. - struct item_head *ih; // item header of found block; - int res; // Return value of various functions we call. - int cont_expand_offset; // We will put offset for generic_cont_expand here - // This can be int just because tails are created - // only for small files. - -/* this embodies a dependency on a particular tail policy */ - if (inode->i_size >= inode->i_sb->s_blocksize * 4) { - /* such a big files do not have tails, so we won't bother ourselves - to look for tails, simply return */ - return 0; - } - - reiserfs_write_lock(inode->i_sb); - /* find the item containing the last byte to be written, or if - * writing past the end of the file then the last item of the - * file (and then we check its type). */ - make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY, - 3 /*key length */ ); - res = search_for_position_by_key(inode->i_sb, &key, &path); - if (res == IO_ERROR) { - reiserfs_write_unlock(inode->i_sb); - return -EIO; - } - ih = get_ih(&path); - res = 0; - if (is_direct_le_ih(ih)) { - /* Ok, closest item is file tail (tails are stored in "direct" - * items), so we need to unpack it. */ - /* To not overcomplicate matters, we just call generic_cont_expand - which will in turn call other stuff and finally will boil down to - reiserfs_get_block() that would do necessary conversion. */ - cont_expand_offset = - le_key_k_offset(get_inode_item_key_version(inode), - &(ih->ih_key)); - pathrelse(&path); - res = generic_cont_expand(inode, cont_expand_offset); - } else - pathrelse(&path); - - reiserfs_write_unlock(inode->i_sb); - return res; -} - -/* This function locks pages starting from @pos for @inode. - @num_pages pages are locked and stored in - @prepared_pages array. Also buffers are allocated for these pages. - First and last page of the region is read if it is overwritten only - partially. If last page did not exist before write (file hole or file - append), it is zeroed, then. - Returns number of unallocated blocks that should be allocated to cover - new file data.*/ -static int reiserfs_prepare_file_region_for_write(struct inode *inode - /* Inode of the file */ , - loff_t pos, /* position in the file */ - size_t num_pages, /* number of pages to - prepare */ - size_t write_bytes, /* Amount of bytes to be - overwritten from - @pos */ - struct page **prepared_pages /* pointer to array - where to store - prepared pages */ - ) -{ - int res = 0; // Return values of different functions we call. - unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages. - int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page - int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; - /* offset of last modified byte in last - page */ - struct address_space *mapping = inode->i_mapping; // Pages are mapped here. - int i; // Simple counter - int blocks = 0; /* Return value (blocks that should be allocated) */ - struct buffer_head *bh, *head; // Current bufferhead and first bufferhead - // of a page. - unsigned block_start, block_end; // Starting and ending offsets of current - // buffer in the page. - struct buffer_head *wait[2], **wait_bh = wait; // Buffers for page, if - // Page appeared to be not up - // to date. Note how we have - // at most 2 buffers, this is - // because we at most may - // partially overwrite two - // buffers for one page. One at // the beginning of write area - // and one at the end. - // Everything inthe middle gets // overwritten totally. - - struct cpu_key key; // cpu key of item that we are going to deal with - struct item_head *ih = NULL; // pointer to item head that we are going to deal with - struct buffer_head *itembuf = NULL; // Buffer head that contains items that we are going to deal with - INITIALIZE_PATH(path); // path to item, that we are going to deal with. - __le32 *item = NULL; // pointer to item we are going to deal with - int item_pos = -1; /* Position in indirect item */ - - if (num_pages < 1) { - reiserfs_warning(inode->i_sb, - "green-9001: reiserfs_prepare_file_region_for_write " - "called with zero number of pages to process"); - return -EFAULT; - } - - /* We have 2 loops for pages. In first loop we grab and lock the pages, so - that nobody would touch these until we release the pages. Then - we'd start to deal with mapping buffers to blocks. */ - for (i = 0; i < num_pages; i++) { - prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page - if (!prepared_pages[i]) { - res = -ENOMEM; - goto failed_page_grabbing; - } - if (!page_has_buffers(prepared_pages[i])) - create_empty_buffers(prepared_pages[i], - inode->i_sb->s_blocksize, 0); - } - - /* Let's count amount of blocks for a case where all the blocks - overwritten are new (we will substract already allocated blocks later) */ - if (num_pages > 2) - /* These are full-overwritten pages so we count all the blocks in - these pages are counted as needed to be allocated */ - blocks = - (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits); - - /* count blocks needed for first page (possibly partially written) */ - blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1)); /* roundup */ - - /* Now we account for last page. If last page == first page (we - overwrite only one page), we substract all the blocks past the - last writing position in a page out of already calculated number - of blocks */ - blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - - ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits); - /* Note how we do not roundup here since partial blocks still - should be allocated */ - - /* Now if all the write area lies past the file end, no point in - maping blocks, since there is none, so we just zero out remaining - parts of first and last pages in write area (if needed) */ - if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) { - if (from != 0) /* First page needs to be partially zeroed */ - zero_user_page(prepared_pages[0], 0, from, KM_USER0); - - if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */ - zero_user_page(prepared_pages[num_pages-1], to, - PAGE_CACHE_SIZE - to, KM_USER0); - - /* Since all blocks are new - use already calculated value */ - return blocks; - } - - /* Well, since we write somewhere into the middle of a file, there is - possibility we are writing over some already allocated blocks, so - let's map these blocks and substract number of such blocks out of blocks - we need to allocate (calculated above) */ - /* Mask write position to start on blocksize, we do it out of the - loop for performance reasons */ - pos &= ~((loff_t) inode->i_sb->s_blocksize - 1); - /* Set cpu key to the starting position in a file (on left block boundary) */ - make_cpu_key(&key, inode, - 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), - TYPE_ANY, 3 /*key length */ ); - - reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key() - for (i = 0; i < num_pages; i++) { - - head = page_buffers(prepared_pages[i]); - /* For each buffer in the page */ - for (bh = head, block_start = 0; bh != head || !block_start; - block_start = block_end, bh = bh->b_this_page) { - if (!bh) - reiserfs_panic(inode->i_sb, - "green-9002: Allocated but absent buffer for a page?"); - /* Find where this buffer ends */ - block_end = block_start + inode->i_sb->s_blocksize; - if (i == 0 && block_end <= from) - /* if this buffer is before requested data to map, skip it */ - continue; - - if (i == num_pages - 1 && block_start >= to) { - /* If this buffer is after requested data to map, abort - processing of current page */ - break; - } - - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - /* This is optimisation for a case where buffer is mapped - and have blocknumber assigned. In case significant amount - of such buffers are present, we may avoid some amount - of search_by_key calls. - Probably it would be possible to move parts of this code - out of BKL, but I afraid that would overcomplicate code - without any noticeable benefit. - */ - item_pos++; - /* Update the key */ - set_cpu_key_k_offset(&key, - cpu_key_k_offset(&key) + - inode->i_sb->s_blocksize); - blocks--; // Decrease the amount of blocks that need to be - // allocated - continue; // Go to the next buffer - } - - if (!itembuf || /* if first iteration */ - item_pos >= ih_item_len(ih) / UNFM_P_SIZE) { /* or if we progressed past the - current unformatted_item */ - /* Try to find next item */ - res = - search_for_position_by_key(inode->i_sb, - &key, &path); - /* Abort if no more items */ - if (res != POSITION_FOUND) { - /* make sure later loops don't use this item */ - itembuf = NULL; - item = NULL; - break; - } - - /* Update information about current indirect item */ - itembuf = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); - item_pos = path.pos_in_item; - - RFALSE(!is_indirect_le_ih(ih), - "green-9003: indirect item expected"); - } - - /* See if there is some block associated with the file - at that position, map the buffer to this block */ - if (get_block_num(item, item_pos)) { - map_bh(bh, inode->i_sb, - get_block_num(item, item_pos)); - blocks--; // Decrease the amount of blocks that need to be - // allocated - } - item_pos++; - /* Update the key */ - set_cpu_key_k_offset(&key, - cpu_key_k_offset(&key) + - inode->i_sb->s_blocksize); - } - } - pathrelse(&path); // Free the path - reiserfs_write_unlock(inode->i_sb); - - /* Now zero out unmappend buffers for the first and last pages of - write area or issue read requests if page is mapped. */ - /* First page, see if it is not uptodate */ - if (!PageUptodate(prepared_pages[0])) { - head = page_buffers(prepared_pages[0]); - - /* For each buffer in page */ - for (bh = head, block_start = 0; bh != head || !block_start; - block_start = block_end, bh = bh->b_this_page) { - - if (!bh) - reiserfs_panic(inode->i_sb, - "green-9002: Allocated but absent buffer for a page?"); - /* Find where this buffer ends */ - block_end = block_start + inode->i_sb->s_blocksize; - if (block_end <= from) - /* if this buffer is before requested data to map, skip it */ - continue; - if (block_start < from) { /* Aha, our partial buffer */ - if (buffer_mapped(bh)) { /* If it is mapped, we need to - issue READ request for it to - not loose data */ - ll_rw_block(READ, 1, &bh); - *wait_bh++ = bh; - } else { /* Not mapped, zero it */ - zero_user_page(prepared_pages[0], - block_start, - from - block_start, KM_USER0); - set_buffer_uptodate(bh); - } - } - } - } - - /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */ - if (!PageUptodate(prepared_pages[num_pages - 1]) || - ((pos + write_bytes) >> PAGE_CACHE_SHIFT) > - (inode->i_size >> PAGE_CACHE_SHIFT)) { - head = page_buffers(prepared_pages[num_pages - 1]); - - /* for each buffer in page */ - for (bh = head, block_start = 0; bh != head || !block_start; - block_start = block_end, bh = bh->b_this_page) { - - if (!bh) - reiserfs_panic(inode->i_sb, - "green-9002: Allocated but absent buffer for a page?"); - /* Find where this buffer ends */ - block_end = block_start + inode->i_sb->s_blocksize; - if (block_start >= to) - /* if this buffer is after requested data to map, skip it */ - break; - if (block_end > to) { /* Aha, our partial buffer */ - if (buffer_mapped(bh)) { /* If it is mapped, we need to - issue READ request for it to - not loose data */ - ll_rw_block(READ, 1, &bh); - *wait_bh++ = bh; - } else { /* Not mapped, zero it */ - zero_user_page(prepared_pages[num_pages-1], - to, block_end - to, KM_USER0); - set_buffer_uptodate(bh); - } - } - } - } - - /* Wait for read requests we made to happen, if necessary */ - while (wait_bh > wait) { - wait_on_buffer(*--wait_bh); - if (!buffer_uptodate(*wait_bh)) { - res = -EIO; - goto failed_read; - } - } - - return blocks; - failed_page_grabbing: - num_pages = i; - failed_read: - reiserfs_unprepare_pages(prepared_pages, num_pages); - return res; -} - /* Write @count bytes at position @ppos in a file indicated by @file from the buffer @buf. @@ -1284,14 +256,9 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t * new current position before returning. */ ) { - size_t already_written = 0; // Number of bytes already written to the file. - loff_t pos; // Current position in the file. - ssize_t res; // return value of various functions that we call. - int err = 0; struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. /* To simplify coding at this time, we store locked pages in array for now */ - struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME]; struct reiserfs_transaction_handle th; th.t_trans_id = 0; @@ -1311,212 +278,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t count = MAX_NON_LFS - (unsigned long)*ppos; } - if (file->f_flags & O_DIRECT) - return do_sync_write(file, buf, count, ppos); - - if (unlikely((ssize_t) count < 0)) - return -EINVAL; - - if (unlikely(!access_ok(VERIFY_READ, buf, count))) - return -EFAULT; - - mutex_lock(&inode->i_mutex); // locks the entire file for just us - - pos = *ppos; - - /* Check if we can write to specified region of file, file - is not overly big and this kind of stuff. Adjust pos and - count, if needed */ - res = generic_write_checks(file, &pos, &count, 0); - if (res) - goto out; - - if (count == 0) - goto out; - - res = remove_suid(file->f_path.dentry); - if (res) - goto out; - - file_update_time(file); - - // Ok, we are done with all the checks. - - // Now we should start real work - - /* If we are going to write past the file's packed tail or if we are going - to overwrite part of the tail, we need that tail to be converted into - unformatted node */ - res = reiserfs_check_for_tail_and_convert(inode, pos, count); - if (res) - goto out; - - while (count > 0) { - /* This is the main loop in which we running until some error occures - or until we write all of the data. */ - size_t num_pages; /* amount of pages we are going to write this iteration */ - size_t write_bytes; /* amount of bytes to write during this iteration */ - size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */ - - /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */ - num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial - pages */ - ((count + - (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT); - /* convert size to amount of - pages */ - reiserfs_write_lock(inode->i_sb); - if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME - || num_pages > reiserfs_can_fit_pages(inode->i_sb)) { - /* If we were asked to write more data than we want to or if there - is not that much space, then we shorten amount of data to write - for this iteration. */ - num_pages = - min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, - reiserfs_can_fit_pages(inode->i_sb)); - /* Also we should not forget to set size in bytes accordingly */ - write_bytes = (num_pages << PAGE_CACHE_SHIFT) - - (pos & (PAGE_CACHE_SIZE - 1)); - /* If position is not on the - start of the page, we need - to substract the offset - within page */ - } else - write_bytes = count; - - /* reserve the blocks to be allocated later, so that later on - we still have the space to write the blocks to */ - reiserfs_claim_blocks_to_be_allocated(inode->i_sb, - num_pages << - (PAGE_CACHE_SHIFT - - inode->i_blkbits)); - reiserfs_write_unlock(inode->i_sb); - - if (!num_pages) { /* If we do not have enough space even for a single page... */ - if (pos > - inode->i_size + inode->i_sb->s_blocksize - - (pos & (inode->i_sb->s_blocksize - 1))) { - res = -ENOSPC; - break; // In case we are writing past the end of the last file block, break. - } - // Otherwise we are possibly overwriting the file, so - // let's set write size to be equal or less than blocksize. - // This way we get it correctly for file holes. - // But overwriting files on absolutelly full volumes would not - // be very efficient. Well, people are not supposed to fill - // 100% of disk space anyway. - write_bytes = - min_t(size_t, count, - inode->i_sb->s_blocksize - - (pos & (inode->i_sb->s_blocksize - 1))); - num_pages = 1; - // No blocks were claimed before, so do it now. - reiserfs_claim_blocks_to_be_allocated(inode->i_sb, - 1 << - (PAGE_CACHE_SHIFT - - - inode-> - i_blkbits)); - } - - /* Prepare for writing into the region, read in all the - partially overwritten pages, if needed. And lock the pages, - so that nobody else can access these until we are done. - We get number of actual blocks needed as a result. */ - res = reiserfs_prepare_file_region_for_write(inode, pos, - num_pages, - write_bytes, - prepared_pages); - if (res < 0) { - reiserfs_release_claimed_blocks(inode->i_sb, - num_pages << - (PAGE_CACHE_SHIFT - - inode->i_blkbits)); - break; - } - - blocks_to_allocate = res; - - /* First we correct our estimate of how many blocks we need */ - reiserfs_release_claimed_blocks(inode->i_sb, - (num_pages << - (PAGE_CACHE_SHIFT - - inode->i_sb-> - s_blocksize_bits)) - - blocks_to_allocate); - - if (blocks_to_allocate > 0) { /*We only allocate blocks if we need to */ - /* Fill in all the possible holes and append the file if needed */ - res = - reiserfs_allocate_blocks_for_region(&th, inode, pos, - num_pages, - write_bytes, - prepared_pages, - blocks_to_allocate); - } - - /* well, we have allocated the blocks, so it is time to free - the reservation we made earlier. */ - reiserfs_release_claimed_blocks(inode->i_sb, - blocks_to_allocate); - if (res) { - reiserfs_unprepare_pages(prepared_pages, num_pages); - break; - } - -/* NOTE that allocating blocks and filling blocks can be done in reverse order - and probably we would do that just to get rid of garbage in files after a - crash */ - - /* Copy data from user-supplied buffer to file's pages */ - res = - reiserfs_copy_from_user_to_file_region(pos, num_pages, - write_bytes, - prepared_pages, buf); - if (res) { - reiserfs_unprepare_pages(prepared_pages, num_pages); - break; - } - - /* Send the pages to disk and unlock them. */ - res = - reiserfs_submit_file_region_for_write(&th, inode, pos, - num_pages, - write_bytes, - prepared_pages); - if (res) - break; - - already_written += write_bytes; - buf += write_bytes; - *ppos = pos += write_bytes; - count -= write_bytes; - balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); - } - - /* this is only true on error */ - if (th.t_trans_id) { - reiserfs_write_lock(inode->i_sb); - err = journal_end(&th, th.t_super, th.t_blocks_allocated); - reiserfs_write_unlock(inode->i_sb); - if (err) { - res = err; - goto out; - } - } - - if (likely(res >= 0) && - (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode)))) - res = generic_osync_inode(inode, file->f_mapping, - OSYNC_METADATA | OSYNC_DATA); - - mutex_unlock(&inode->i_mutex); - reiserfs_async_progress_wait(inode->i_sb); - return (already_written != 0) ? already_written : res; - - out: - mutex_unlock(&inode->i_mutex); // unlock the file on exit. - return res; + return do_sync_write(file, buf, count, ppos); } const struct file_operations reiserfs_file_operations = { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ddde489f1cb..95051d44a91 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -17,11 +17,12 @@ #include <linux/mpage.h> #include <linux/writeback.h> #include <linux/quotaops.h> +#include <linux/swap.h> -static int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to); -static int reiserfs_prepare_write(struct file *f, struct page *page, - unsigned from, unsigned to); +int reiserfs_commit_write(struct file *f, struct page *page, + unsigned from, unsigned to); +int reiserfs_prepare_write(struct file *f, struct page *page, + unsigned from, unsigned to); void reiserfs_delete_inode(struct inode *inode) { @@ -2550,8 +2551,78 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) return reiserfs_write_full_page(page, wbc); } -static int reiserfs_prepare_write(struct file *f, struct page *page, - unsigned from, unsigned to) +static int reiserfs_write_begin(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode; + struct page *page; + pgoff_t index; + int ret; + int old_ref = 0; + + inode = mapping->host; + *fsdata = 0; + if (flags & AOP_FLAG_CONT_EXPAND && + (pos & (inode->i_sb->s_blocksize - 1)) == 0) { + pos ++; + *fsdata = (void *)(unsigned long)flags; + } + + index = pos >> PAGE_CACHE_SHIFT; + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + + reiserfs_wait_on_write_block(inode->i_sb); + fix_tail_page_for_writing(page); + if (reiserfs_transaction_running(inode->i_sb)) { + struct reiserfs_transaction_handle *th; + th = (struct reiserfs_transaction_handle *)current-> + journal_info; + BUG_ON(!th->t_refcount); + BUG_ON(!th->t_trans_id); + old_ref = th->t_refcount; + th->t_refcount++; + } + ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + reiserfs_get_block); + if (ret && reiserfs_transaction_running(inode->i_sb)) { + struct reiserfs_transaction_handle *th = current->journal_info; + /* this gets a little ugly. If reiserfs_get_block returned an + * error and left a transacstion running, we've got to close it, + * and we've got to free handle if it was a persistent transaction. + * + * But, if we had nested into an existing transaction, we need + * to just drop the ref count on the handle. + * + * If old_ref == 0, the transaction is from reiserfs_get_block, + * and it was a persistent trans. Otherwise, it was nested above. + */ + if (th->t_refcount > old_ref) { + if (old_ref) + th->t_refcount--; + else { + int err; + reiserfs_write_lock(inode->i_sb); + err = reiserfs_end_persistent_transaction(th); + reiserfs_write_unlock(inode->i_sb); + if (err) + ret = err; + } + } + } + if (ret) { + unlock_page(page); + page_cache_release(page); + } + return ret; +} + +int reiserfs_prepare_write(struct file *f, struct page *page, + unsigned from, unsigned to) { struct inode *inode = page->mapping->host; int ret; @@ -2604,8 +2675,102 @@ static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) return generic_block_bmap(as, block, reiserfs_bmap); } -static int reiserfs_commit_write(struct file *f, struct page *page, - unsigned from, unsigned to) +static int reiserfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + int ret = 0; + int update_sd = 0; + struct reiserfs_transaction_handle *th; + unsigned start; + + if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) + pos ++; + + reiserfs_wait_on_write_block(inode->i_sb); + if (reiserfs_transaction_running(inode->i_sb)) + th = current->journal_info; + else + th = NULL; + + start = pos & (PAGE_CACHE_SIZE - 1); + if (unlikely(copied < len)) { + if (!PageUptodate(page)) + copied = 0; + + page_zero_new_buffers(page, start + copied, start + len); + } + flush_dcache_page(page); + + reiserfs_commit_page(inode, page, start, start + copied); + + /* generic_commit_write does this for us, but does not update the + ** transaction tracking stuff when the size changes. So, we have + ** to do the i_size updates here. + */ + pos += copied; + if (pos > inode->i_size) { + struct reiserfs_transaction_handle myth; + reiserfs_write_lock(inode->i_sb); + /* If the file have grown beyond the border where it + can have a tail, unmark it as needing a tail + packing */ + if ((have_large_tails(inode->i_sb) + && inode->i_size > i_block_size(inode) * 4) + || (have_small_tails(inode->i_sb) + && inode->i_size > i_block_size(inode))) + REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; + + ret = journal_begin(&myth, inode->i_sb, 1); + if (ret) { + reiserfs_write_unlock(inode->i_sb); + goto journal_error; + } + reiserfs_update_inode_transaction(inode); + inode->i_size = pos; + /* + * this will just nest into our transaction. It's important + * to use mark_inode_dirty so the inode gets pushed around on the + * dirty lists, and so that O_SYNC works as expected + */ + mark_inode_dirty(inode); + reiserfs_update_sd(&myth, inode); + update_sd = 1; + ret = journal_end(&myth, inode->i_sb, 1); + reiserfs_write_unlock(inode->i_sb); + if (ret) + goto journal_error; + } + if (th) { + reiserfs_write_lock(inode->i_sb); + if (!update_sd) + mark_inode_dirty(inode); + ret = reiserfs_end_persistent_transaction(th); + reiserfs_write_unlock(inode->i_sb); + if (ret) + goto out; + } + + out: + unlock_page(page); + page_cache_release(page); + return ret == 0 ? copied : ret; + + journal_error: + if (th) { + reiserfs_write_lock(inode->i_sb); + if (!update_sd) + reiserfs_update_sd(th, inode); + ret = reiserfs_end_persistent_transaction(th); + reiserfs_write_unlock(inode->i_sb); + } + + goto out; +} + +int reiserfs_commit_write(struct file *f, struct page *page, + unsigned from, unsigned to) { struct inode *inode = page->mapping->host; loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; @@ -2909,7 +3074,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) } /* fill in hole pointers in the expanding truncate case. */ if (attr->ia_size > inode->i_size) { - error = generic_cont_expand(inode, attr->ia_size); + error = generic_cont_expand_simple(inode, attr->ia_size); if (REISERFS_I(inode)->i_prealloc_count > 0) { int err; struct reiserfs_transaction_handle th; @@ -2999,8 +3164,8 @@ const struct address_space_operations reiserfs_address_space_operations = { .releasepage = reiserfs_releasepage, .invalidatepage = reiserfs_invalidatepage, .sync_page = block_sync_page, - .prepare_write = reiserfs_prepare_write, - .commit_write = reiserfs_commit_write, + .write_begin = reiserfs_write_begin, + .write_end = reiserfs_write_end, .bmap = reiserfs_aop_bmap, .direct_IO = reiserfs_direct_IO, .set_page_dirty = reiserfs_set_page_dirty, diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 11a0fcc2d40..c438a8f83f2 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -128,6 +128,10 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, } #endif +int reiserfs_commit_write(struct file *f, struct page *page, + unsigned from, unsigned to); +int reiserfs_prepare_write(struct file *f, struct page *page, + unsigned from, unsigned to); /* ** reiserfs_unpack ** Function try to convert tail from direct item into indirect. @@ -175,15 +179,13 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp) if (!page) { goto out; } - retval = - mapping->a_ops->prepare_write(NULL, page, write_from, write_from); + retval = reiserfs_prepare_write(NULL, page, write_from, write_from); if (retval) goto out_unlock; /* conversion can change page contents, must flush */ flush_dcache_page(page); - retval = - mapping->a_ops->commit_write(NULL, page, write_from, write_from); + retval = reiserfs_commit_write(NULL, page, write_from, write_from); REISERFS_I(inode)->i_flags |= i_nopack_mask; out_unlock: diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index bf6e5821453..fab4b9b2664 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -426,6 +426,12 @@ static inline __u32 xattr_hash(const char *msg, int len) return csum_partial(msg, len, 0); } +int reiserfs_commit_write(struct file *f, struct page *page, + unsigned from, unsigned to); +int reiserfs_prepare_write(struct file *f, struct page *page, + unsigned from, unsigned to); + + /* Generic extended attribute operations that can be used by xa plugins */ /* @@ -512,15 +518,15 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, rxh->h_hash = cpu_to_le32(xahash); } - err = mapping->a_ops->prepare_write(fp, page, page_offset, - page_offset + chunk + skip); + err = reiserfs_prepare_write(fp, page, page_offset, + page_offset + chunk + skip); if (!err) { if (buffer) memcpy(data + skip, buffer + buffer_pos, chunk); err = - mapping->a_ops->commit_write(fp, page, page_offset, - page_offset + chunk + - skip); + reiserfs_commit_write(fp, page, page_offset, + page_offset + chunk + + skip); } unlock_page(page); reiserfs_put_page(page); diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index c5d78a7e492..f5d14cebc75 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -292,29 +292,45 @@ out: * If the writer ends up delaying the write, the writer needs to * increment the page use counts until he is done with the page. */ -static int smb_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +static int smb_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + *pagep = __grab_cache_page(mapping, index); + if (!*pagep) + return -ENOMEM; return 0; } -static int smb_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +static int smb_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { int status; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - status = -EFAULT; lock_kernel(); - status = smb_updatepage(file, page, offset, to-offset); + status = smb_updatepage(file, page, offset, copied); unlock_kernel(); + + if (!status) { + if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) + SetPageUptodate(page); + status = copied; + } + + unlock_page(page); + page_cache_release(page); + return status; } const struct address_space_operations smb_file_aops = { .readpage = smb_readpage, .writepage = smb_writepage, - .prepare_write = smb_prepare_write, - .commit_write = smb_commit_write + .write_begin = smb_write_begin, + .write_end = smb_write_end, }; /* diff --git a/fs/splice.c b/fs/splice.c index e95a3622886..59a941d404d 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -447,7 +447,7 @@ fill_it: */ while (page_nr < nr_pages) page_cache_release(pages[page_nr++]); - in->f_ra.prev_index = index; + in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; if (spd.nr_pages) return splice_to_pipe(pipe, &spd); @@ -563,7 +563,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct address_space *mapping = file->f_mapping; unsigned int offset, this_len; struct page *page; - pgoff_t index; + void *fsdata; int ret; /* @@ -573,49 +573,16 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (unlikely(ret)) return ret; - index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; this_len = sd->len; if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; -find_page: - page = find_lock_page(mapping, index); - if (!page) { - ret = -ENOMEM; - page = page_cache_alloc_cold(mapping); - if (unlikely(!page)) - goto out_ret; - - /* - * This will also lock the page - */ - ret = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL); - if (unlikely(ret)) - goto out_release; - } - - ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); - if (unlikely(ret)) { - loff_t isize = i_size_read(mapping->host); - - if (ret != AOP_TRUNCATED_PAGE) - unlock_page(page); - page_cache_release(page); - if (ret == AOP_TRUNCATED_PAGE) - goto find_page; - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. - */ - if (sd->pos + this_len > isize) - vmtruncate(mapping->host, isize); - - goto out_ret; - } + ret = pagecache_write_begin(file, mapping, sd->pos, this_len, + AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + if (unlikely(ret)) + goto out; if (buf->page != page) { /* @@ -629,31 +596,9 @@ find_page: kunmap_atomic(dst, KM_USER1); buf->ops->unmap(pipe, buf, src); } - - ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); - if (ret) { - if (ret == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto find_page; - } - if (ret < 0) - goto out; - /* - * Partial write has happened, so 'ret' already initialized by - * number of bytes written, Where is nothing we have to do here. - */ - } else - ret = this_len; - /* - * Return the number of bytes written and mark page as - * accessed, we are now done! - */ - mark_page_accessed(page); + ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, + page, fsdata); out: - unlock_page(page); -out_release: - page_cache_release(page); -out_ret: return ret; } @@ -1390,10 +1335,10 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) ret = -EFAULT; + buf->ops->unmap(pipe, buf, src); out: if (ret > 0) sd->u.userptr += ret; - buf->ops->unmap(pipe, buf, src); return ret; } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 9236635111f..c4ef945d39c 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -24,8 +24,8 @@ extern struct super_block * sysfs_sb; static const struct address_space_operations sysfs_aops = { .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write + .write_begin = simple_write_begin, + .write_end = simple_write_end, }; static struct backing_dev_info sysfs_backing_dev_info = { diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index e566b387fcf..56f655254bf 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -16,6 +16,7 @@ #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/smp_lock.h> +#include <linux/swap.h> #include "sysv.h" static int sysv_readdir(struct file *, void *, filldir_t); @@ -37,12 +38,17 @@ static inline unsigned long dir_pages(struct inode *inode) return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; } -static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) +static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = (struct inode *)page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; - page->mapping->a_ops->commit_write(NULL, page, from, to); + block_write_end(NULL, mapping, pos, len, len, page, NULL); + if (pos+len > dir->i_size) { + i_size_write(dir, pos+len); + mark_inode_dirty(dir); + } if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -186,7 +192,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode) unsigned long npages = dir_pages(dir); unsigned long n; char *kaddr; - unsigned from, to; + loff_t pos; int err; /* We take care of directory expansion in the same loop */ @@ -212,16 +218,17 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode) return -EINVAL; got_it: - from = (char*)de - (char*)page_address(page); - to = from + SYSV_DIRSIZE; + pos = page_offset(page) + + (char*)de - (char*)page_address(page); lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) goto out_unlock; memcpy (de->name, name, namelen); memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2); de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); out_page: @@ -238,15 +245,15 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) struct address_space *mapping = page->mapping; struct inode *inode = (struct inode*)mapping->host; char *kaddr = (char*)page_address(page); - unsigned from = (char*)de - kaddr; - unsigned to = from + SYSV_DIRSIZE; + loff_t pos = page_offset(page) + (char *)de - kaddr; int err; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); de->inode = 0; - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); dir_put_page(page); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(inode); @@ -263,12 +270,13 @@ int sysv_make_empty(struct inode *inode, struct inode *dir) if (!page) return -ENOMEM; - kmap(page); - err = mapping->a_ops->prepare_write(NULL, page, 0, 2 * SYSV_DIRSIZE); + err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) { unlock_page(page); goto fail; } + kmap(page); base = (char*)page_address(page); memset(base, 0, PAGE_CACHE_SIZE); @@ -280,9 +288,9 @@ int sysv_make_empty(struct inode *inode, struct inode *dir) de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), dir->i_ino); strcpy(de->name,".."); + kunmap(page); err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE); fail: - kunmap(page); page_cache_release(page); return err; } @@ -336,16 +344,18 @@ not_empty: void sysv_set_link(struct sysv_dir_entry *de, struct page *page, struct inode *inode) { - struct inode *dir = (struct inode*)page->mapping->host; - unsigned from = (char *)de-(char*)page_address(page); - unsigned to = from + SYSV_DIRSIZE; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; + loff_t pos = page_offset(page) + + (char *)de-(char*)page_address(page); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); dir_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index f2bcccd1d6f..f042eec464c 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -453,23 +453,38 @@ static int sysv_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page,get_block,wbc); } + static int sysv_readpage(struct file *file, struct page *page) { return block_read_full_page(page,get_block); } -static int sysv_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) + +int __sysv_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page,from,to,get_block); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + get_block); } + +static int sysv_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata); +} + static sector_t sysv_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,get_block); } + const struct address_space_operations sysv_aops = { .readpage = sysv_readpage, .writepage = sysv_writepage, .sync_page = block_sync_page, - .prepare_write = sysv_prepare_write, - .commit_write = generic_commit_write, + .write_begin = sysv_write_begin, + .write_end = generic_write_end, .bmap = sysv_bmap }; diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 5b4fedf17cc..64c03bdf06a 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -136,6 +136,9 @@ extern unsigned long sysv_count_free_blocks(struct super_block *); /* itree.c */ extern void sysv_truncate(struct inode *); +extern int __sysv_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); /* inode.c */ extern int sysv_write_inode(struct inode *, int); diff --git a/fs/udf/file.c b/fs/udf/file.c index 5d7a4ea2775..7c7a1b39d56 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -76,36 +76,29 @@ static int udf_adinicb_writepage(struct page *page, struct writeback_control *wb return 0; } -static int udf_adinicb_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +static int udf_adinicb_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - kmap(page); - return 0; -} - -static int udf_adinicb_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) -{ - struct inode *inode = page->mapping->host; - char *kaddr = page_address(page); + struct inode *inode = mapping->host; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + char *kaddr; + kaddr = kmap_atomic(page, KM_USER0); memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset, - kaddr + offset, to - offset); - mark_inode_dirty(inode); - SetPageUptodate(page); - kunmap(page); - /* only one page here */ - if (to > inode->i_size) - inode->i_size = to; - return 0; + kaddr + offset, copied); + kunmap_atomic(kaddr, KM_USER0); + + return simple_write_end(file, mapping, pos, len, copied, page, fsdata); } const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, .sync_page = block_sync_page, - .prepare_write = udf_adinicb_prepare_write, - .commit_write = udf_adinicb_commit_write, + .write_begin = simple_write_begin, + .write_end = udf_adinicb_write_end, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1652b2c665b..6ff8151984c 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -133,10 +133,13 @@ static int udf_readpage(struct file *file, struct page *page) return block_read_full_page(page, udf_get_block); } -static int udf_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int udf_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page, from, to, udf_get_block); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + udf_get_block); } static sector_t udf_bmap(struct address_space *mapping, sector_t block) @@ -148,8 +151,8 @@ const struct address_space_operations udf_aops = { .readpage = udf_readpage, .writepage = udf_writepage, .sync_page = block_sync_page, - .prepare_write = udf_prepare_write, - .commit_write = generic_commit_write, + .write_begin = udf_write_begin, + .write_end = generic_write_end, .bmap = udf_bmap, }; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 154452172f4..2410ec6002d 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -19,6 +19,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/ufs_fs.h> +#include <linux/swap.h> #include "swab.h" #include "util.h" @@ -38,12 +39,18 @@ static inline int ufs_match(struct super_block *sb, int len, return !memcmp(name, de->d_name, len); } -static int ufs_commit_chunk(struct page *page, unsigned from, unsigned to) +static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; + dir->i_version++; - page->mapping->a_ops->commit_write(NULL, page, from, to); + block_write_end(NULL, mapping, pos, len, len, page, NULL); + if (pos+len > dir->i_size) { + i_size_write(dir, pos+len); + mark_inode_dirty(dir); + } if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -81,16 +88,20 @@ ino_t ufs_inode_by_name(struct inode *dir, struct dentry *dentry) void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, struct page *page, struct inode *inode) { - unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + fs16_to_cpu(dir->i_sb, de->d_reclen); + loff_t pos = page_offset(page) + + (char *) de - (char *) page_address(page); + unsigned len = fs16_to_cpu(dir->i_sb, de->d_reclen); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = __ufs_write_begin(NULL, page->mapping, pos, len, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); + de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); ufs_set_de_type(dir->i_sb, de, inode->i_mode); - err = ufs_commit_chunk(page, from, to); + + err = ufs_commit_chunk(page, pos, len); ufs_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); @@ -312,7 +323,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode) unsigned long npages = ufs_dir_pages(dir); unsigned long n; char *kaddr; - unsigned from, to; + loff_t pos; int err; UFSD("ENTER, name %s, namelen %u\n", name, namelen); @@ -367,9 +378,10 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode) return -EINVAL; got_it: - from = (char*)de - (char*)page_address(page); - to = from + rec_len; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + pos = page_offset(page) + + (char*)de - (char*)page_address(page); + err = __ufs_write_begin(NULL, page->mapping, pos, rec_len, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) goto out_unlock; if (de->d_ino) { @@ -386,7 +398,7 @@ got_it: de->d_ino = cpu_to_fs32(sb, inode->i_ino); ufs_set_de_type(sb, de, inode->i_mode); - err = ufs_commit_chunk(page, from, to); + err = ufs_commit_chunk(page, pos, rec_len); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); @@ -509,6 +521,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir, char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); + loff_t pos; struct ufs_dir_entry *pde = NULL; struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from); int err; @@ -532,13 +545,16 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir, } if (pde) from = (char*)pde - (char*)page_address(page); + + pos = page_offset(page) + from; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = __ufs_write_begin(NULL, mapping, pos, to - from, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); if (pde) - pde->d_reclen = cpu_to_fs16(sb, to-from); + pde->d_reclen = cpu_to_fs16(sb, to - from); dir->d_ino = 0; - err = ufs_commit_chunk(page, from, to); + err = ufs_commit_chunk(page, pos, to - from); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(inode); out: @@ -559,14 +575,15 @@ int ufs_make_empty(struct inode * inode, struct inode *dir) if (!page) return -ENOMEM; - kmap(page); - err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); + + err = __ufs_write_begin(NULL, mapping, 0, chunk_size, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) { unlock_page(page); goto fail; } - + kmap(page); base = (char*)page_address(page); memset(base, 0, PAGE_CACHE_SIZE); @@ -584,10 +601,10 @@ int ufs_make_empty(struct inode * inode, struct inode *dir) de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1)); ufs_set_de_namlen(sb, de, 2); strcpy (de->d_name, ".."); + kunmap(page); err = ufs_commit_chunk(page, 0, chunk_size); fail: - kunmap(page); page_cache_release(page); return err; } diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index f18b79122fa..d84d4b0f477 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -558,24 +558,39 @@ static int ufs_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page,ufs_getfrag_block,wbc); } + static int ufs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,ufs_getfrag_block); } -static int ufs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) + +int __ufs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page,from,to,ufs_getfrag_block); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ufs_getfrag_block); } + +static int ufs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata); +} + static sector_t ufs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,ufs_getfrag_block); } + const struct address_space_operations ufs_aops = { .readpage = ufs_readpage, .writepage = ufs_writepage, .sync_page = block_sync_page, - .prepare_write = ufs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = ufs_write_begin, + .write_end = generic_write_end, .bmap = ufs_bmap }; diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 06d344839c4..79a340a1909 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -231,6 +231,9 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value) extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *); extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t); +extern int __ufs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); /* * These functions manipulate ufs buffers diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 6f4c29e9c3d..354d68a32d4 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1508,13 +1508,18 @@ xfs_vm_direct_IO( } STATIC int -xfs_vm_prepare_write( +xfs_vm_write_begin( struct file *file, - struct page *page, - unsigned int from, - unsigned int to) + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned flags, + struct page **pagep, + void **fsdata) { - return block_prepare_write(page, from, to, xfs_get_blocks); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + xfs_get_blocks); } STATIC sector_t @@ -1568,8 +1573,8 @@ const struct address_space_operations xfs_address_space_operations = { .sync_page = block_sync_page, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, - .prepare_write = xfs_vm_prepare_write, - .commit_write = generic_commit_write, + .write_begin = xfs_vm_write_begin, + .write_end = generic_write_end, .bmap = xfs_vm_bmap, .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 765ec16a6e3..7e7aeb4c8a0 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -134,45 +134,34 @@ xfs_iozero( loff_t pos, /* offset in file */ size_t count) /* size of data to zero */ { - unsigned bytes; struct page *page; struct address_space *mapping; int status; mapping = ip->i_mapping; do { - unsigned long index, offset; + unsigned offset, bytes; + void *fsdata; offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - index = pos >> PAGE_CACHE_SHIFT; bytes = PAGE_CACHE_SIZE - offset; if (bytes > count) bytes = count; - status = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - break; - - status = mapping->a_ops->prepare_write(NULL, page, offset, - offset + bytes); + status = pagecache_write_begin(NULL, mapping, pos, bytes, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); if (status) - goto unlock; + break; zero_user_page(page, offset, bytes, KM_USER0); - status = mapping->a_ops->commit_write(NULL, page, offset, - offset + bytes); - if (!status) { - pos += bytes; - count -= bytes; - } - -unlock: - unlock_page(page); - page_cache_release(page); - if (status) - break; + status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, + page, fsdata); + WARN_ON(status <= 0); /* can't return less than zero! */ + pos += bytes; + count -= bytes; + status = 0; } while (count); return (-status); |