diff options
Diffstat (limited to 'fs')
48 files changed, 873 insertions, 695 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c index 7a1d942ef68..0149dab365e 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -102,6 +102,7 @@ int afs_release(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_AFS_FSCACHE /* * deal with notification that a page was read from the cache */ @@ -117,6 +118,7 @@ static void afs_file_readpage_read_complete(struct page *page, SetPageUptodate(page); unlock_page(page); } +#endif /* * AFS read page from file, directory or symlink diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c index 49f18942306..7ad36506c25 100644 --- a/fs/afs/netdevices.c +++ b/fs/afs/netdevices.c @@ -20,8 +20,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen) struct net_device *dev; int ret = -ENODEV; - if (maclen != ETH_ALEN) - BUG(); + BUG_ON(maclen != ETH_ALEN); rtnl_lock(); dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); diff --git a/fs/befs/super.c b/fs/befs/super.c index 41f2b4d0093..ca40f828f64 100644 --- a/fs/befs/super.c +++ b/fs/befs/super.c @@ -8,6 +8,7 @@ */ #include <linux/fs.h> +#include <asm/page.h> /* for PAGE_SIZE */ #include "befs.h" #include "super.h" @@ -348,6 +348,24 @@ err: return NULL; } +/** + * bio_alloc - allocate a bio for I/O + * @gfp_mask: the GFP_ mask given to the slab allocator + * @nr_iovecs: number of iovecs to pre-allocate + * + * Description: + * bio_alloc will allocate a bio and associated bio_vec array that can hold + * at least @nr_iovecs entries. Allocations will be done from the + * fs_bio_set. Also see @bio_alloc_bioset. + * + * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate + * a bio. This is due to the mempool guarantees. To make this work, callers + * must never allocate more than 1 bio at the time from this pool. Callers + * that need to allocate more than 1 bio must always submit the previously + * allocate bio for IO before attempting to allocate a new one. Failure to + * do so can cause livelocks under memory pressure. + * + **/ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) { struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); diff --git a/fs/buffer.c b/fs/buffer.c index 6e35762b616..b3e5be7514f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -360,7 +360,7 @@ still_busy: * Completion handler for block_write_full_page() - pages which are unlocked * during I/O, and which have PageWriteback cleared upon I/O completion. */ -static void end_buffer_async_write(struct buffer_head *bh, int uptodate) +void end_buffer_async_write(struct buffer_head *bh, int uptodate) { char b[BDEVNAME_SIZE]; unsigned long flags; @@ -438,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh) set_buffer_async_read(bh); } -void mark_buffer_async_write(struct buffer_head *bh) +void mark_buffer_async_write_endio(struct buffer_head *bh, + bh_end_io_t *handler) { - bh->b_end_io = end_buffer_async_write; + bh->b_end_io = handler; set_buffer_async_write(bh); } + +void mark_buffer_async_write(struct buffer_head *bh) +{ + mark_buffer_async_write_endio(bh, end_buffer_async_write); +} EXPORT_SYMBOL(mark_buffer_async_write); @@ -547,7 +553,7 @@ repeat: return err; } -void do_thaw_all(unsigned long unused) +void do_thaw_all(struct work_struct *work) { struct super_block *sb; char b[BDEVNAME_SIZE]; @@ -567,6 +573,7 @@ restart: goto restart; } spin_unlock(&sb_lock); + kfree(work); printk(KERN_WARNING "Emergency Thaw complete\n"); } @@ -577,7 +584,13 @@ restart: */ void emergency_thaw_all(void) { - pdflush_operation(do_thaw_all, 0); + struct work_struct *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (work) { + INIT_WORK(work, do_thaw_all); + schedule_work(work); + } } /** @@ -1596,9 +1609,20 @@ EXPORT_SYMBOL(unmap_underlying_metadata); * locked buffer. This only can happen if someone has written the buffer * directly, with submit_bh(). At the address_space level PageWriteback * prevents this contention from occurring. + * + * If block_write_full_page() is called with wbc->sync_mode == + * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this + * causes the writes to be flagged as synchronous writes, but the + * block device queue will NOT be unplugged, since usually many pages + * will be pushed to the out before the higher-level caller actually + * waits for the writes to be completed. The various wait functions, + * such as wait_on_writeback_range() will ultimately call sync_page() + * which will ultimately call blk_run_backing_dev(), which will end up + * unplugging the device queue. */ static int __block_write_full_page(struct inode *inode, struct page *page, - get_block_t *get_block, struct writeback_control *wbc) + get_block_t *get_block, struct writeback_control *wbc, + bh_end_io_t *handler) { int err; sector_t block; @@ -1606,7 +1630,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; - int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_op = (wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC_PLUG : WRITE); BUG_ON(!PageLocked(page)); @@ -1682,7 +1707,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, continue; } if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write(bh); + mark_buffer_async_write_endio(bh, handler); } else { unlock_buffer(bh); } @@ -1735,7 +1760,7 @@ recover: if (buffer_mapped(bh) && buffer_dirty(bh) && !buffer_delay(bh)) { lock_buffer(bh); - mark_buffer_async_write(bh); + mark_buffer_async_write_endio(bh, handler); } else { /* * The buffer may have been set dirty during @@ -2661,7 +2686,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block, out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) - ret = __block_write_full_page(inode, page, get_block, wbc); + ret = __block_write_full_page(inode, page, get_block, wbc, + end_buffer_async_write); return ret; } EXPORT_SYMBOL(nobh_writepage); @@ -2819,9 +2845,10 @@ out: /* * The generic ->writepage function for buffer-backed address_spaces + * this form passes in the end_io handler used to finish the IO. */ -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) +int block_write_full_page_endio(struct page *page, get_block_t *get_block, + struct writeback_control *wbc, bh_end_io_t *handler) { struct inode * const inode = page->mapping->host; loff_t i_size = i_size_read(inode); @@ -2830,7 +2857,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block, /* Is the page fully inside i_size? */ if (page->index < end_index) - return __block_write_full_page(inode, page, get_block, wbc); + return __block_write_full_page(inode, page, get_block, wbc, + handler); /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_CACHE_SIZE-1); @@ -2853,9 +2881,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * writes to that region are not written out to the file." */ zero_user_segment(page, offset, PAGE_CACHE_SIZE); - return __block_write_full_page(inode, page, get_block, wbc); + return __block_write_full_page(inode, page, get_block, wbc, handler); +} + +/* + * The generic ->writepage function for buffer-backed address_spaces + */ +int block_write_full_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc) +{ + return block_write_full_page_endio(page, get_block, wbc, + end_buffer_async_write); } + sector_t generic_block_bmap(struct address_space *mapping, sector_t block, get_block_t *get_block) { @@ -3324,9 +3363,11 @@ EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_write_full_page); +EXPORT_SYMBOL(block_write_full_page_endio); EXPORT_SYMBOL(cont_write_begin); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); +EXPORT_SYMBOL(end_buffer_async_write); EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_cont_expand_simple); diff --git a/fs/direct-io.c b/fs/direct-io.c index da258e7249c..05763bbc205 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -307,8 +307,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, struct bio *bio; bio = bio_alloc(GFP_KERNEL, nr_vecs); - if (bio == NULL) - return -ENOMEM; bio->bi_bdev = bdev; bio->bi_sector = first_sector; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b43b9556366..acf67883110 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -590,9 +590,8 @@ static int ext2_get_blocks(struct inode *inode, if (depth == 0) return (err); -reread: - partial = ext2_get_branch(inode, depth, offsets, chain, &err); + partial = ext2_get_branch(inode, depth, offsets, chain, &err); /* Simplest case - block found, no allocation needed */ if (!partial) { first_block = le32_to_cpu(chain[depth - 1].key); @@ -602,15 +601,16 @@ reread: while (count < maxblocks && count <= blocks_to_boundary) { ext2_fsblk_t blk; - if (!verify_chain(chain, partial)) { + if (!verify_chain(chain, chain + depth - 1)) { /* * Indirect block might be removed by * truncate while we were reading it. * Handling of that case: forget what we've * got now, go to reread. */ + err = -EAGAIN; count = 0; - goto changed; + break; } blk = le32_to_cpu(*(chain[depth-1].p + count)); if (blk == first_block + count) @@ -618,7 +618,8 @@ reread: else break; } - goto got_it; + if (err != -EAGAIN) + goto got_it; } /* Next simple case - plain lookup or failed read of indirect block */ @@ -626,6 +627,33 @@ reread: goto cleanup; mutex_lock(&ei->truncate_mutex); + /* + * If the indirect block is missing while we are reading + * the chain(ext3_get_branch() returns -EAGAIN err), or + * if the chain has been changed after we grab the semaphore, + * (either because another process truncated this branch, or + * another get_block allocated this branch) re-grab the chain to see if + * the request block has been allocated or not. + * + * Since we already block the truncate/other get_block + * at this point, we will have the current copy of the chain when we + * splice the branch into the tree. + */ + if (err == -EAGAIN || !verify_chain(chain, partial)) { + while (partial > chain) { + brelse(partial->bh); + partial--; + } + partial = ext2_get_branch(inode, depth, offsets, chain, &err); + if (!partial) { + count++; + mutex_unlock(&ei->truncate_mutex); + if (err) + goto cleanup; + clear_buffer_new(bh_result); + goto got_it; + } + } /* * Okay, we need to do block allocation. Lazily initialize the block @@ -683,12 +711,6 @@ cleanup: partial--; } return err; -changed: - while (partial > chain) { - brelse(partial->bh); - partial--; - } - goto reread; } int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 466a332e0bd..fcfa2436185 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1521,12 +1521,16 @@ static int ext3_ordered_writepage(struct page *page, if (!page_has_buffers(page)) { create_empty_buffers(page, inode->i_sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); - } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { - /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */ - return block_write_full_page(page, NULL, wbc); + page_bufs = page_buffers(page); + } else { + page_bufs = page_buffers(page); + if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE, + NULL, buffer_unmapped)) { + /* Provide NULL get_block() to catch bugs if buffers + * weren't really mapped */ + return block_write_full_page(page, NULL, wbc); + } } - page_bufs = page_buffers(page); - handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { @@ -1581,6 +1585,15 @@ static int ext3_writeback_writepage(struct page *page, if (ext3_journal_current_handle()) goto out_fail; + if (page_has_buffers(page)) { + if (!walk_page_buffers(NULL, page_buffers(page), 0, + PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { + /* Provide NULL get_block() to catch bugs if buffers + * weren't really mapped */ + return block_write_full_page(page, NULL, wbc); + } + } + handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ac77d8b8251..2a1cb097976 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -342,7 +342,7 @@ static int ext4_valid_extent_idx(struct inode *inode, ext4_fsblk_t block = idx_pblock(ext_idx); struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; if (unlikely(block < le32_to_cpu(es->s_first_data_block) || - (block > ext4_blocks_count(es)))) + (block >= ext4_blocks_count(es)))) return 0; else return 1; @@ -2416,8 +2416,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) len = ee_len; bio = bio_alloc(GFP_NOIO, len); - if (!bio) - return -ENOMEM; bio->bi_sector = ee_pblock; bio->bi_bdev = inode->i_sb->s_bdev; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a2e7952bc5f..c6bd6ced3bb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -372,16 +372,16 @@ static int ext4_block_to_path(struct inode *inode, } static int __ext4_check_blockref(const char *function, struct inode *inode, - unsigned int *p, unsigned int max) { + __le32 *p, unsigned int max) { unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); - unsigned int *bref = p; + __le32 *bref = p; while (bref < p+max) { - if (unlikely(*bref >= maxblocks)) { + if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { ext4_error(inode->i_sb, function, "block reference %u >= max (%u) " "in inode #%lu, offset=%d", - *bref, maxblocks, + le32_to_cpu(*bref), maxblocks, inode->i_ino, (int)(bref-p)); return -EIO; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9987bba99db..2958f4e6f22 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2508,6 +2508,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (EXT4_BLOCKS_PER_GROUP(sb) == 0) goto cantfind_ext4; + /* check blocks count against device size */ + blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; + if (blocks_count && ext4_blocks_count(es) > blocks_count) { + printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " + "exceeds size of device (%llu blocks)\n", + ext4_blocks_count(es), blocks_count); + goto failed_mount; + } + /* * It makes no sense for the first data block to be beyond the end * of the filesystem. diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index d0a69ff2537..182f9ffe2b5 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig @@ -95,3 +95,6 @@ config FAT_DEFAULT_IOCHARSET Note that "utf8" is not recommended for FAT filesystems. If unsure, you shouldn't set "utf8" here. See <file:Documentation/filesystems/vfat.txt> for more information. + + Enable any character sets you need in File Systems/Native Language + Support. diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2b25133524a..06f30e96567 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -938,9 +938,9 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) } static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, - unsigned *nbytesp, int write) + size_t *nbytesp, int write) { - unsigned nbytes = *nbytesp; + size_t nbytes = *nbytesp; unsigned long user_addr = (unsigned long) buf; unsigned offset = user_addr & ~PAGE_MASK; int npages; @@ -955,7 +955,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, return 0; } - nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); + nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); down_read(¤t->mm->mmap_sem); @@ -1298,6 +1298,8 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_flags & VM_MAYSHARE) return -ENODEV; + invalidate_inode_pages2(file->f_mapping); + return generic_file_mmap(file, vma); } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 3984e47d1d3..1afd9f26bcb 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -597,7 +597,6 @@ __acquires(&gl->gl_spin) GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); - down_read(&gfs2_umount_flush_sem); if (test_bit(GLF_DEMOTE, &gl->gl_flags) && gl->gl_demote_state != gl->gl_state) { if (find_first_holder(gl)) @@ -614,15 +613,14 @@ __acquires(&gl->gl_spin) if (ret == 0) goto out_unlock; if (ret == 2) - goto out_sem; + goto out; gh = find_first_waiter(gl); gl->gl_target = gh->gh_state; if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) do_error(gl, 0); /* Fail queued try locks */ } do_xmote(gl, gh, gl->gl_target); -out_sem: - up_read(&gfs2_umount_flush_sem); +out: return; out_sched: @@ -631,7 +629,7 @@ out_sched: gfs2_glock_put(gl); out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); - goto out_sem; + goto out; } static void glock_work_func(struct work_struct *work) @@ -641,6 +639,7 @@ static void glock_work_func(struct work_struct *work) if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) finish_xmote(gl, gl->gl_reply); + down_read(&gfs2_umount_flush_sem); spin_lock(&gl->gl_spin); if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && @@ -653,6 +652,7 @@ static void glock_work_func(struct work_struct *work) } run_queue(gl, 0); spin_unlock(&gl->gl_spin); + up_read(&gfs2_umount_flush_sem); if (!delay || queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7b277d44915..5a31d426116 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -137,15 +137,15 @@ void gfs2_set_iop(struct inode *inode) if (S_ISREG(mode)) { inode->i_op = &gfs2_file_iops; if (gfs2_localflocks(sdp)) - inode->i_fop = gfs2_file_fops_nolock; + inode->i_fop = &gfs2_file_fops_nolock; else - inode->i_fop = gfs2_file_fops; + inode->i_fop = &gfs2_file_fops; } else if (S_ISDIR(mode)) { inode->i_op = &gfs2_dir_iops; if (gfs2_localflocks(sdp)) - inode->i_fop = gfs2_dir_fops_nolock; + inode->i_fop = &gfs2_dir_fops_nolock; else - inode->i_fop = gfs2_dir_fops; + inode->i_fop = &gfs2_dir_fops; } else if (S_ISLNK(mode)) { inode->i_op = &gfs2_symlink_iops; } else { diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index dca4fee3078..c30be2b6658 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -101,21 +101,23 @@ void gfs2_dinode_print(const struct gfs2_inode *ip); extern const struct inode_operations gfs2_file_iops; extern const struct inode_operations gfs2_dir_iops; extern const struct inode_operations gfs2_symlink_iops; -extern const struct file_operations *gfs2_file_fops_nolock; -extern const struct file_operations *gfs2_dir_fops_nolock; +extern const struct file_operations gfs2_file_fops_nolock; +extern const struct file_operations gfs2_dir_fops_nolock; extern void gfs2_set_inode_flags(struct inode *inode); #ifdef CONFIG_GFS2_FS_LOCKING_DLM -extern const struct file_operations *gfs2_file_fops; -extern const struct file_operations *gfs2_dir_fops; +extern const struct file_operations gfs2_file_fops; +extern const struct file_operations gfs2_dir_fops; + static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) { return sdp->sd_args.ar_localflocks; } #else /* Single node only */ -#define gfs2_file_fops NULL -#define gfs2_dir_fops NULL +#define gfs2_file_fops gfs2_file_fops_nolock +#define gfs2_dir_fops gfs2_dir_fops_nolock + static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) { return 1; diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 70b9b854894..101caf3ee86 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -705,7 +705,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) } } -const struct file_operations *gfs2_file_fops = &(const struct file_operations){ +const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, @@ -723,7 +723,7 @@ const struct file_operations *gfs2_file_fops = &(const struct file_operations){ .setlease = gfs2_setlease, }; -const struct file_operations *gfs2_dir_fops = &(const struct file_operations){ +const struct file_operations gfs2_dir_fops = { .readdir = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, @@ -735,7 +735,7 @@ const struct file_operations *gfs2_dir_fops = &(const struct file_operations){ #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ -const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){ +const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, @@ -751,7 +751,7 @@ const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operat .setlease = generic_setlease, }; -const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){ +const struct file_operations gfs2_dir_fops_nolock = { .readdir = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 51883b3ad89..650a730707b 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -272,11 +272,6 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) lock_page(page); bio = bio_alloc(GFP_NOFS, 1); - if (unlikely(!bio)) { - __free_page(page); - return -ENOBUFS; - } - bio->bi_sector = sector * (sb->s_blocksize >> 9); bio->bi_bdev = sb->s_bdev; bio_add_page(bio, page, PAGE_SIZE, 0); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index abd5429ae28..1c70fa5168d 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -371,6 +371,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, ip = ghs[1].gh_gl->gl_object; ip->i_disksize = size; + i_size_write(inode, size); error = gfs2_meta_inode_buffer(ip, &dibh); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8d53f66b5bc..152e6c4a0dc 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -81,7 +81,7 @@ struct gfs2_quota_change_host { static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); -static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(qd_lru_lock); int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) { @@ -1364,7 +1364,7 @@ int gfs2_quotad(void *data) refrigerator(); t = min(quotad_timeo, statfs_timeo); - prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); spin_lock(&sdp->sd_trunc_lock); empty = list_empty(&sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9435dda8f1e..a1cbff2b4d9 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -70,6 +70,10 @@ static int hfs_releasepage(struct page *page, gfp_t mask) BUG(); return 0; } + + if (!tree) + return 0; + if (tree->node_size >= PAGE_CACHE_SIZE) { nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); spin_lock(&tree->hash_lock); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 36ca2e1a4fa..7b6165f25fb 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -349,6 +349,7 @@ void hfs_mdb_put(struct super_block *sb) if (HFS_SB(sb)->nls_disk) unload_nls(HFS_SB(sb)->nls_disk); + free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); kfree(HFS_SB(sb)); sb->s_fs_info = NULL; } diff --git a/fs/inode.c b/fs/inode.c index d06d6d268de..6ad14a1cd8c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode) spin_lock(&inode_lock); } -/* - * We rarely want to lock two inodes that do not have a parent/child - * relationship (such as directory, child inode) simultaneously. The - * vast majority of file systems should be able to get along fine - * without this. Do not use these functions except as a last resort. - */ -void inode_double_lock(struct inode *inode1, struct inode *inode2) -{ - if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { - if (inode1) - mutex_lock(&inode1->i_mutex); - else if (inode2) - mutex_lock(&inode2->i_mutex); - return; - } - - if (inode1 < inode2) { - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); - } -} -EXPORT_SYMBOL(inode_double_lock); - -void inode_double_unlock(struct inode *inode1, struct inode *inode2) -{ - if (inode1) - mutex_unlock(&inode1->i_mutex); - - if (inode2 && inode2 != inode1) - mutex_unlock(&inode2->i_mutex); -} -EXPORT_SYMBOL(inode_double_unlock); - static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index c7bd649bbbd..3e9afc2a91d 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -55,6 +55,25 @@ * need do nothing. * RevokeValid set, Revoked set: * buffer has been revoked. + * + * Locking rules: + * We keep two hash tables of revoke records. One hashtable belongs to the + * running transaction (is pointed to by journal->j_revoke), the other one + * belongs to the committing transaction. Accesses to the second hash table + * happen only from the kjournald and no other thread touches this table. Also + * journal_switch_revoke_table() which switches which hashtable belongs to the + * running and which to the committing transaction is called only from + * kjournald. Therefore we need no locks when accessing the hashtable belonging + * to the committing transaction. + * + * All users operating on the hash table belonging to the running transaction + * have a handle to the transaction. Therefore they are safe from kjournald + * switching hash tables under them. For operations on the lists of entries in + * the hash table j_revoke_lock is used. + * + * Finally, also replay code uses the hash tables but at this moment noone else + * can touch them (filesystem isn't mounted yet) and hence no locking is + * needed. */ #ifndef __KERNEL__ @@ -402,8 +421,6 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, * the second time we would still have a pending revoke to cancel. So, * do not trust the Revoked bit on buffers unless RevokeValid is also * set. - * - * The caller must have the journal locked. */ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) { @@ -481,10 +498,7 @@ void journal_switch_revoke_table(journal_t *journal) /* * Write revoke records to the journal for all entries in the current * revoke hash, deleting the entries as we go. - * - * Called with the journal lock held. */ - void journal_write_revoke_records(journal_t *journal, transaction_t *transaction) { diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 24638e059bf..064279e33bb 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -688,6 +688,8 @@ static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { .bpop_translate = NULL, }; +static struct lock_class_key nilfs_bmap_dat_lock_key; + /** * nilfs_bmap_read - read a bmap from an inode * @bmap: bmap @@ -715,6 +717,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) bmap->b_pops = &nilfs_bmap_ptr_ops_p; bmap->b_last_allocated_key = 0; /* XXX: use macro */ bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); break; case NILFS_CPFILE_INO: case NILFS_SUFILE_INO: @@ -772,6 +775,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) { memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); init_rwsem(&gcbmap->b_sem); + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; } @@ -779,5 +783,6 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) { memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); init_rwsem(&bmap->b_sem); + lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; } diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 7558c977db0..3d0c18a16db 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -35,11 +35,6 @@ #include "bmap_union.h" /* - * NILFS filesystem version - */ -#define NILFS_VERSION "2.0.5" - -/* * nilfs inode data in memory */ struct nilfs_inode_info { diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 6ade0963fc1..4fc081e47d7 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -413,7 +413,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, struct nilfs_segment_entry *ent, *n; struct inode *sufile = nilfs->ns_sufile; __u64 segnum[4]; - time_t mtime; int err; int i; @@ -442,24 +441,13 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * Collecting segments written after the latest super root. * These are marked dirty to avoid being reallocated in the next write. */ - mtime = get_seconds(); list_for_each_entry_safe(ent, n, head, list) { - if (ent->segnum == segnum[0]) { - list_del(&ent->list); - nilfs_free_segment_entry(ent); - continue; - } - err = nilfs_open_segment_entry(ent, sufile); - if (unlikely(err)) - goto failed; - if (!nilfs_segment_usage_dirty(ent->raw_su)) { - /* make the segment garbage */ - ent->raw_su->su_nblocks = cpu_to_le32(0); - ent->raw_su->su_lastmod = cpu_to_le32(mtime); - nilfs_segment_usage_set_dirty(ent->raw_su); + if (ent->segnum != segnum[0]) { + err = nilfs_sufile_scrap(sufile, ent->segnum); + if (unlikely(err)) + goto failed; } list_del(&ent->list); - nilfs_close_segment_entry(ent, sufile); nilfs_free_segment_entry(ent); } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index c774cf397e2..98e68677f04 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -93,6 +93,52 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, create, NULL, bhp); } +static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, + u64 ncleanadd, u64 ndirtyadd) +{ + struct nilfs_sufile_header *header; + void *kaddr; + + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = kaddr + bh_offset(header_bh); + le64_add_cpu(&header->sh_ncleansegs, ncleanadd); + le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(header_bh); +} + +int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)) +{ + struct buffer_head *header_bh, *bh; + int ret; + + if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { + printk(KERN_WARNING "%s: invalid segment number: %llu\n", + __func__, (unsigned long long)segnum); + return -EINVAL; + } + down_write(&NILFS_MDT(sufile)->mi_sem); + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (ret < 0) + goto out_sem; + + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh); + if (!ret) { + dofunc(sufile, segnum, header_bh, bh); + brelse(bh); + } + brelse(header_bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + /** * nilfs_sufile_alloc - allocate a segment * @sufile: inode of segment usage file @@ -113,7 +159,6 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) { struct buffer_head *header_bh, *su_bh; - struct the_nilfs *nilfs; struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; size_t susz = NILFS_MDT(sufile)->mi_entry_size; @@ -124,8 +169,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) down_write(&NILFS_MDT(sufile)->mi_sem); - nilfs = NILFS_MDT(sufile)->mi_nilfs; - ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; @@ -192,165 +235,84 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) return ret; } -/** - * nilfs_sufile_cancel_free - - * @sufile: inode of segment usage file - * @segnum: segment number - * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) +void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, + struct buffer_head *header_bh, + struct buffer_head *su_bh) { - struct buffer_head *header_bh, *su_bh; - struct the_nilfs *nilfs; - struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; void *kaddr; - int ret; - - down_write(&NILFS_MDT(sufile)->mi_sem); - - nilfs = NILFS_MDT(sufile)->mi_nilfs; - - ret = nilfs_sufile_get_header_block(sufile, &header_bh); - if (ret < 0) - goto out_sem; - - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); - if (ret < 0) - goto out_header; kaddr = kmap_atomic(su_bh->b_page, KM_USER0); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum, su_bh, kaddr); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { printk(KERN_WARNING "%s: segment %llu must be clean\n", __func__, (unsigned long long)segnum); kunmap_atomic(kaddr, KM_USER0); - goto out_su_bh; + return; } nilfs_segment_usage_set_dirty(su); kunmap_atomic(kaddr, KM_USER0); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); - le64_add_cpu(&header->sh_ncleansegs, -1); - le64_add_cpu(&header->sh_ndirtysegs, 1); - kunmap_atomic(kaddr, KM_USER0); - - nilfs_mdt_mark_buffer_dirty(header_bh); + nilfs_sufile_mod_counter(header_bh, -1, 1); nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); - - out_su_bh: - brelse(su_bh); - out_header: - brelse(header_bh); - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); - return ret; } -/** - * nilfs_sufile_freev - free segments - * @sufile: inode of segment usage file - * @segnum: array of segment numbers - * @nsegs: number of segments - * - * Description: nilfs_sufile_freev() frees segments specified by @segnum and - * @nsegs, which must have been returned by a previous call to - * nilfs_sufile_alloc(). - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -#define NILFS_SUFILE_FREEV_PREALLOC 16 -int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs) +void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, + struct buffer_head *header_bh, + struct buffer_head *su_bh) { - struct buffer_head *header_bh, **su_bh, - *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC]; - struct the_nilfs *nilfs; - struct nilfs_sufile_header *header; struct nilfs_segment_usage *su; void *kaddr; - int ret, i; + int clean, dirty; - down_write(&NILFS_MDT(sufile)->mi_sem); - - nilfs = NILFS_MDT(sufile)->mi_nilfs; - - /* prepare resources */ - if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC) - su_bh = su_bh_prealloc; - else { - su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS); - if (su_bh == NULL) { - ret = -ENOMEM; - goto out_sem; - } - } - - ret = nilfs_sufile_get_header_block(sufile, &header_bh); - if (ret < 0) - goto out_su_bh; - for (i = 0; i < nsegs; i++) { - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i], - 0, &su_bh[i]); - if (ret < 0) - goto out_bh; - } - - /* free segments */ - for (i = 0; i < nsegs; i++) { - kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0); - su = nilfs_sufile_block_get_segment_usage( - sufile, segnum[i], su_bh[i], kaddr); - WARN_ON(nilfs_segment_usage_error(su)); - nilfs_segment_usage_set_clean(su); + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && + su->su_nblocks == cpu_to_le32(0)) { kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(su_bh[i]); + return; } - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); - le64_add_cpu(&header->sh_ncleansegs, nsegs); - le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs); + clean = nilfs_segment_usage_clean(su); + dirty = nilfs_segment_usage_dirty(su); + + /* make the segment garbage */ + su->su_lastmod = cpu_to_le64(0); + su->su_nblocks = cpu_to_le32(0); + su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); + + nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); + nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); +} - out_bh: - for (i--; i >= 0; i--) - brelse(su_bh[i]); - brelse(header_bh); +void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, + struct buffer_head *header_bh, + struct buffer_head *su_bh) +{ + struct nilfs_segment_usage *su; + void *kaddr; + int sudirty; - out_su_bh: - if (su_bh != su_bh_prealloc) - kfree(su_bh); + kaddr = kmap_atomic(su_bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); + if (nilfs_segment_usage_clean(su)) { + printk(KERN_WARNING "%s: segment %llu is already clean\n", + __func__, (unsigned long long)segnum); + kunmap_atomic(kaddr, KM_USER0); + return; + } + WARN_ON(nilfs_segment_usage_error(su)); + WARN_ON(!nilfs_segment_usage_dirty(su)); - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); - return ret; -} + sudirty = nilfs_segment_usage_dirty(su); + nilfs_segment_usage_set_clean(su); + kunmap_atomic(kaddr, KM_USER0); + nilfs_mdt_mark_buffer_dirty(su_bh); -/** - * nilfs_sufile_free - - * @sufile: - * @segnum: - */ -int nilfs_sufile_free(struct inode *sufile, __u64 segnum) -{ - return nilfs_sufile_freev(sufile, &segnum, 1); + nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); + nilfs_mdt_mark_dirty(sufile); } /** @@ -500,72 +462,28 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) return ret; } -/** - * nilfs_sufile_set_error - mark a segment as erroneous - * @sufile: inode of segment usage file - * @segnum: segment number - * - * Description: nilfs_sufile_set_error() marks the segment specified by - * @segnum as erroneous. The error segment will never be used again. - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid segment usage number. - */ -int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) +void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, + struct buffer_head *header_bh, + struct buffer_head *su_bh) { - struct buffer_head *header_bh, *su_bh; struct nilfs_segment_usage *su; - struct nilfs_sufile_header *header; void *kaddr; - int ret; - - if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { - printk(KERN_WARNING "%s: invalid segment number: %llu\n", - __func__, (unsigned long long)segnum); - return -EINVAL; - } - down_write(&NILFS_MDT(sufile)->mi_sem); - - ret = nilfs_sufile_get_header_block(sufile, &header_bh); - if (ret < 0) - goto out_sem; - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); - if (ret < 0) - goto out_header; + int suclean; kaddr = kmap_atomic(su_bh->b_page, KM_USER0); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_error(su)) { kunmap_atomic(kaddr, KM_USER0); - brelse(su_bh); - goto out_header; + return; } - + suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); kunmap_atomic(kaddr, KM_USER0); - brelse(su_bh); - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); - le64_add_cpu(&header->sh_ndirtysegs, -1); - kunmap_atomic(kaddr, KM_USER0); - nilfs_mdt_mark_buffer_dirty(header_bh); + if (suclean) + nilfs_sufile_mod_counter(header_bh, -1, 0); nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); - brelse(su_bh); - - out_header: - brelse(header_bh); - - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); - return ret; } /** @@ -625,7 +543,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); si[i + j].sui_flags = le32_to_cpu(su->su_flags) & ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); - if (nilfs_segment_is_active(nilfs, segnum + i + j)) + if (nilfs_segment_is_active(nilfs, segnum + j)) si[i + j].sui_flags |= (1UL << NILFS_SEGMENT_USAGE_ACTIVE); } diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index d595f33a768..a2e2efd4ade 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -36,9 +36,6 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) } int nilfs_sufile_alloc(struct inode *, __u64 *); -int nilfs_sufile_cancel_free(struct inode *, __u64); -int nilfs_sufile_freev(struct inode *, __u64 *, size_t); -int nilfs_sufile_free(struct inode *, __u64); int nilfs_sufile_get_segment_usage(struct inode *, __u64, struct nilfs_segment_usage **, struct buffer_head **); @@ -46,9 +43,83 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64, struct buffer_head *); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); -int nilfs_sufile_set_error(struct inode *, __u64); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, size_t); +int nilfs_sufile_update(struct inode *, __u64, int, + void (*dofunc)(struct inode *, __u64, + struct buffer_head *, + struct buffer_head *)); +void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); +void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); +void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); +void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, + struct buffer_head *); + +/** + * nilfs_sufile_cancel_free - + * @sufile: inode of segment usage file + * @segnum: segment number + * + * Description: + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + */ +static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_update(sufile, segnum, 0, + nilfs_sufile_do_cancel_free); +} + +/** + * nilfs_sufile_scrap - make a segment garbage + * @sufile: inode of segment usage file + * @segnum: segment number to be freed + */ +static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_update(sufile, segnum, 1, nilfs_sufile_do_scrap); +} + +/** + * nilfs_sufile_free - free segment + * @sufile: inode of segment usage file + * @segnum: segment number to be freed + */ +static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_update(sufile, segnum, 0, nilfs_sufile_do_free); +} + +/** + * nilfs_sufile_set_error - mark a segment as erroneous + * @sufile: inode of segment usage file + * @segnum: segment number + * + * Description: nilfs_sufile_set_error() marks the segment specified by + * @segnum as erroneous. The error segment will never be used again. + * + * Return Value: On success, 0 is returned. On error, one of the following + * negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid segment usage number. + */ +static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) +{ + return nilfs_sufile_update(sufile, segnum, 0, + nilfs_sufile_do_set_error); +} #endif /* _NILFS_SUFILE_H */ diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e117e1ea9bf..6989b03e97a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -63,7 +63,6 @@ MODULE_AUTHOR("NTT Corp."); MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); -MODULE_VERSION(NILFS_VERSION); MODULE_LICENSE("GPL"); static int nilfs_remount(struct super_block *sb, int *flags, char *data); @@ -476,11 +475,12 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sbi->s_nilfs; + u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; unsigned long overhead; unsigned long nrsvblocks; sector_t nfreeblocks; - struct the_nilfs *nilfs = sbi->s_nilfs; int err; /* @@ -514,6 +514,9 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = atomic_read(&sbi->s_inodes_count); buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ buf->f_namelen = NILFS_NAME_LEN; + buf->f_fsid.val[0] = (u32)id; + buf->f_fsid.val[1] = (u32)(id >> 32); + return 0; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 33400cf0bbe..7f65b3be4aa 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -115,6 +115,7 @@ void put_nilfs(struct the_nilfs *nilfs) static int nilfs_load_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, sector_t sr_block) { + static struct lock_class_key dat_lock_key; struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; struct nilfs_super_block **sbp = nilfs->ns_sbp; @@ -163,6 +164,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, if (unlikely(err)) goto failed_sufile; + lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key); + lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key); + nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, sizeof(struct nilfs_cpfile_header)); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8672b953603..c2a87c885b7 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1912,6 +1912,22 @@ out_sems: return written ? written : ret; } +static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, + struct file *out, + struct splice_desc *sd) +{ + int ret; + + ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, + sd->total_len, 0, NULL); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + return splice_from_pipe_feed(pipe, sd, pipe_to_file); +} + static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, @@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, unsigned int flags) { int ret; - struct inode *inode = out->f_path.dentry->d_inode; + struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; + struct splice_desc sd = { + .total_len = len, + .flags = flags, + .pos = *ppos, + .u.file = out, + }; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, (unsigned int)len, out->f_path.dentry->d_name.len, out->f_path.dentry->d_name.name); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); - ret = ocfs2_rw_lock(inode, 1); - if (ret < 0) { - mlog_errno(ret); - goto out; - } + splice_from_pipe_begin(&sd); + do { + ret = splice_from_pipe_next(pipe, &sd); + if (ret <= 0) + break; - ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, - NULL); - if (ret < 0) { - mlog_errno(ret); - goto out_unlock; - } + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + ret = ocfs2_rw_lock(inode, 1); + if (ret < 0) + mlog_errno(ret); + else { + ret = ocfs2_splice_to_file(pipe, out, &sd); + ocfs2_rw_unlock(inode, 1); + } + mutex_unlock(&inode->i_mutex); + } while (ret > 0); + splice_from_pipe_end(pipe, &sd); if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); - ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); - if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); -out_unlock: - ocfs2_rw_unlock(inode, 1); -out: - mutex_unlock(&inode->i_mutex); + if (sd.num_spliced) + ret = sd.num_spliced; + + if (ret > 0) { + unsigned long nr_pages; + + *ppos += ret; + nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + + mutex_lock(&inode->i_mutex); + err = ocfs2_rw_lock(inode, 1); + if (err < 0) { + mlog_errno(err); + } else { + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + ocfs2_rw_unlock(inode, 1); + } + mutex_unlock(&inode->i_mutex); + + if (err) + ret = err; + } + balance_dirty_pages_ratelimited_nr(mapping, nr_pages); + } mlog_exit(ret); return ret; diff --git a/fs/pipe.c b/fs/pipe.c index 4af7aa52181..13414ec45b8 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -37,6 +37,42 @@ * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 */ +static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) +{ + if (pipe->inode) + mutex_lock_nested(&pipe->inode->i_mutex, subclass); +} + +void pipe_lock(struct pipe_inode_info *pipe) +{ + /* + * pipe_lock() nests non-pipe inode locks (for writing to a file) + */ + pipe_lock_nested(pipe, I_MUTEX_PARENT); +} +EXPORT_SYMBOL(pipe_lock); + +void pipe_unlock(struct pipe_inode_info *pipe) +{ + if (pipe->inode) + mutex_unlock(&pipe->inode->i_mutex); +} +EXPORT_SYMBOL(pipe_unlock); + +void pipe_double_lock(struct pipe_inode_info *pipe1, + struct pipe_inode_info *pipe2) +{ + BUG_ON(pipe1 == pipe2); + + if (pipe1 < pipe2) { + pipe_lock_nested(pipe1, I_MUTEX_PARENT); + pipe_lock_nested(pipe2, I_MUTEX_CHILD); + } else { + pipe_lock_nested(pipe2, I_MUTEX_CHILD); + pipe_lock_nested(pipe1, I_MUTEX_PARENT); + } +} + /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe) { @@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe) * is considered a noninteractive wait: */ prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); schedule(); finish_wait(&pipe->wait, &wait); - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); } static int diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 12c20377772..64a72e2e765 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -135,7 +135,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) struct inode *inode = vma->vm_file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; - pgoff = (loff_t)vma->pg_off << PAGE_SHIFT; + pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; } seq_printf(m, diff --git a/fs/splice.c b/fs/splice.c index c18aa7e03e2..666953d59a3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, do_wakeup = 0; page_nr = 0; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); for (;;) { if (!pipe->readers) { @@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, pipe->waiting_writers--; } - if (pipe->inode) { - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + if (do_wakeup) { + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } while (page_nr < spd_pages) @@ -555,8 +552,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ -static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) +int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, + struct splice_desc *sd) { struct file *file = sd->u.file; struct address_space *mapping = file->f_mapping; @@ -600,108 +597,177 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, out: return ret; } +EXPORT_SYMBOL(pipe_to_file); + +static void wakeup_pipe_writers(struct pipe_inode_info *pipe) +{ + smp_mb(); + if (waitqueue_active(&pipe->wait)) + wake_up_interruptible(&pipe->wait); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); +} /** - * __splice_from_pipe - splice data from a pipe to given actor + * splice_from_pipe_feed - feed available data from a pipe to a file * @pipe: pipe to splice from * @sd: information to @actor * @actor: handler that splices the data * * Description: - * This function does little more than loop over the pipe and call - * @actor to do the actual moving of a single struct pipe_buffer to - * the desired destination. See pipe_to_file, pipe_to_sendpage, or - * pipe_to_user. + * This function loops over the pipe and calls @actor to do the + * actual moving of a single struct pipe_buffer to the desired + * destination. It returns when there's no more buffers left in + * the pipe or if the requested number of bytes (@sd->total_len) + * have been copied. It returns a positive number (one) if the + * pipe needs to be filled with more data, zero if the required + * number of bytes have been copied and -errno on error. * + * This, together with splice_from_pipe_{begin,end,next}, may be + * used to implement the functionality of __splice_from_pipe() when + * locking is required around copying the pipe buffers to the + * destination. */ -ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, - splice_actor *actor) +int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, + splice_actor *actor) { - int ret, do_wakeup, err; - - ret = 0; - do_wakeup = 0; - - for (;;) { - if (pipe->nrbufs) { - struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; - const struct pipe_buf_operations *ops = buf->ops; + int ret; - sd->len = buf->len; - if (sd->len > sd->total_len) - sd->len = sd->total_len; + while (pipe->nrbufs) { + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; + const struct pipe_buf_operations *ops = buf->ops; - err = actor(pipe, buf, sd); - if (err <= 0) { - if (!ret && err != -ENODATA) - ret = err; + sd->len = buf->len; + if (sd->len > sd->total_len) + sd->len = sd->total_len; - break; - } + ret = actor(pipe, buf, sd); + if (ret <= 0) { + if (ret == -ENODATA) + ret = 0; + return ret; + } + buf->offset += ret; + buf->len -= ret; - ret += err; - buf->offset += err; - buf->len -= err; + sd->num_spliced += ret; + sd->len -= ret; + sd->pos += ret; + sd->total_len -= ret; - sd->len -= err; - sd->pos += err; - sd->total_len -= err; - if (sd->len) - continue; + if (!buf->len) { + buf->ops = NULL; + ops->release(pipe, buf); + pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); + pipe->nrbufs--; + if (pipe->inode) + sd->need_wakeup = true; + } - if (!buf->len) { - buf->ops = NULL; - ops->release(pipe, buf); - pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); - pipe->nrbufs--; - if (pipe->inode) - do_wakeup = 1; - } + if (!sd->total_len) + return 0; + } - if (!sd->total_len) - break; - } + return 1; +} +EXPORT_SYMBOL(splice_from_pipe_feed); - if (pipe->nrbufs) - continue; +/** + * splice_from_pipe_next - wait for some data to splice from + * @pipe: pipe to splice from + * @sd: information about the splice operation + * + * Description: + * This function will wait for some data and return a positive + * value (one) if pipe buffers are available. It will return zero + * or -errno if no more data needs to be spliced. + */ +int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) +{ + while (!pipe->nrbufs) { if (!pipe->writers) - break; - if (!pipe->waiting_writers) { - if (ret) - break; - } + return 0; - if (sd->flags & SPLICE_F_NONBLOCK) { - if (!ret) - ret = -EAGAIN; - break; - } + if (!pipe->waiting_writers && sd->num_spliced) + return 0; - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } + if (sd->flags & SPLICE_F_NONBLOCK) + return -EAGAIN; - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible_sync(&pipe->wait); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - do_wakeup = 0; + if (signal_pending(current)) + return -ERESTARTSYS; + + if (sd->need_wakeup) { + wakeup_pipe_writers(pipe); + sd->need_wakeup = false; } pipe_wait(pipe); } - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } + return 1; +} +EXPORT_SYMBOL(splice_from_pipe_next); - return ret; +/** + * splice_from_pipe_begin - start splicing from pipe + * @sd: information about the splice operation + * + * Description: + * This function should be called before a loop containing + * splice_from_pipe_next() and splice_from_pipe_feed() to + * initialize the necessary fields of @sd. + */ +void splice_from_pipe_begin(struct splice_desc *sd) +{ + sd->num_spliced = 0; + sd->need_wakeup = false; +} +EXPORT_SYMBOL(splice_from_pipe_begin); + +/** + * splice_from_pipe_end - finish splicing from pipe + * @pipe: pipe to splice from + * @sd: information about the splice operation + * + * Description: + * This function will wake up pipe writers if necessary. It should + * be called after a loop containing splice_from_pipe_next() and + * splice_from_pipe_feed(). + */ +void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) +{ + if (sd->need_wakeup) + wakeup_pipe_writers(pipe); +} +EXPORT_SYMBOL(splice_from_pipe_end); + +/** + * __splice_from_pipe - splice data from a pipe to given actor + * @pipe: pipe to splice from + * @sd: information to @actor + * @actor: handler that splices the data + * + * Description: + * This function does little more than loop over the pipe and call + * @actor to do the actual moving of a single struct pipe_buffer to + * the desired destination. See pipe_to_file, pipe_to_sendpage, or + * pipe_to_user. + * + */ +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, + splice_actor *actor) +{ + int ret; + + splice_from_pipe_begin(sd); + do { + ret = splice_from_pipe_next(pipe, sd); + if (ret > 0) + ret = splice_from_pipe_feed(pipe, sd, actor); + } while (ret > 0); + splice_from_pipe_end(pipe, sd); + + return sd->num_spliced ? sd->num_spliced : ret; } EXPORT_SYMBOL(__splice_from_pipe); @@ -715,7 +781,7 @@ EXPORT_SYMBOL(__splice_from_pipe); * @actor: handler that splices the data * * Description: - * See __splice_from_pipe. This function locks the input and output inodes, + * See __splice_from_pipe. This function locks the pipe inode, * otherwise it's identical to __splice_from_pipe(). * */ @@ -724,7 +790,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, splice_actor *actor) { ssize_t ret; - struct inode *inode = out->f_mapping->host; struct splice_desc sd = { .total_len = len, .flags = flags, @@ -732,30 +797,15 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; - /* - * The actor worker might be calling ->write_begin and - * ->write_end. Most of the time, these expect i_mutex to - * be held. Since this may result in an ABBA deadlock with - * pipe->inode, we have to order lock acquiry here. - * - * Outer lock must be inode->i_mutex, as pipe_wait() will - * release and reacquire pipe->inode->i_mutex, AND inode must - * never be a pipe. - */ - WARN_ON(S_ISFIFO(inode->i_mode)); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); + pipe_lock(pipe); ret = __splice_from_pipe(pipe, &sd, actor); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); - mutex_unlock(&inode->i_mutex); + pipe_unlock(pipe); return ret; } /** - * generic_file_splice_write_nolock - generic_file_splice_write without mutexes + * generic_file_splice_write - splice data from a pipe to a file * @pipe: pipe info * @out: file to write to * @ppos: position in @out @@ -764,13 +814,12 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, * * Description: * Will either move or copy pages (determined by @flags options) from - * the given pipe inode to the given file. The caller is responsible - * for acquiring i_mutex on both inodes. + * the given pipe inode to the given file. * */ ssize_t -generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) +generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; @@ -781,76 +830,28 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, .u.file = out, }; ssize_t ret; - int err; - - err = file_remove_suid(out); - if (unlikely(err)) - return err; - - ret = __splice_from_pipe(pipe, &sd, pipe_to_file); - if (ret > 0) { - unsigned long nr_pages; - *ppos += ret; - nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - /* - * If file or inode is SYNC and we actually wrote some data, - * sync it. - */ - if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - err = generic_osync_inode(inode, mapping, - OSYNC_METADATA|OSYNC_DATA); + pipe_lock(pipe); - if (err) - ret = err; - } - balance_dirty_pages_ratelimited_nr(mapping, nr_pages); - } + splice_from_pipe_begin(&sd); + do { + ret = splice_from_pipe_next(pipe, &sd); + if (ret <= 0) + break; - return ret; -} + mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); + ret = file_remove_suid(out); + if (!ret) + ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); + mutex_unlock(&inode->i_mutex); + } while (ret > 0); + splice_from_pipe_end(pipe, &sd); -EXPORT_SYMBOL(generic_file_splice_write_nolock); + pipe_unlock(pipe); -/** - * generic_file_splice_write - splice data from a pipe to a file - * @pipe: pipe info - * @out: file to write to - * @ppos: position in @out - * @len: number of bytes to splice - * @flags: splice modifier flags - * - * Description: - * Will either move or copy pages (determined by @flags options) from - * the given pipe inode to the given file. - * - */ -ssize_t -generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) -{ - struct address_space *mapping = out->f_mapping; - struct inode *inode = mapping->host; - struct splice_desc sd = { - .total_len = len, - .flags = flags, - .pos = *ppos, - .u.file = out, - }; - ssize_t ret; + if (sd.num_spliced) + ret = sd.num_spliced; - WARN_ON(S_ISFIFO(inode->i_mode)); - mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); - ret = file_remove_suid(out); - if (likely(!ret)) { - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); - ret = __splice_from_pipe(pipe, &sd, pipe_to_file); - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); - } - mutex_unlock(&inode->i_mutex); if (ret > 0) { unsigned long nr_pages; @@ -1339,8 +1340,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, if (!pipe) return -EBADF; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); error = ret = 0; while (nr_segs) { @@ -1395,8 +1395,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, iov++; } - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); if (!ret) ret = error; @@ -1524,7 +1523,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); while (!pipe->nrbufs) { if (signal_pending(current)) { @@ -1542,7 +1541,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe_wait(pipe); } - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -1562,7 +1561,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) return 0; ret = 0; - mutex_lock(&pipe->inode->i_mutex); + pipe_lock(pipe); while (pipe->nrbufs >= PIPE_BUFFERS) { if (!pipe->readers) { @@ -1583,7 +1582,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) pipe->waiting_writers--; } - mutex_unlock(&pipe->inode->i_mutex); + pipe_unlock(pipe); return ret; } @@ -1599,10 +1598,10 @@ static int link_pipe(struct pipe_inode_info *ipipe, /* * Potential ABBA deadlock, work around it by ordering lock - * grabbing by inode address. Otherwise two different processes + * grabbing by pipe info address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - inode_double_lock(ipipe->inode, opipe->inode); + pipe_double_lock(ipipe, opipe); do { if (!opipe->readers) { @@ -1653,7 +1652,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) ret = -EAGAIN; - inode_double_unlock(ipipe->inode, opipe->inode); + pipe_unlock(ipipe); + pipe_unlock(opipe); /* * If we put data in the output pipe, wakeup any potential readers. diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c13f67300fe..7ec89fc05b2 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -153,23 +153,6 @@ xfs_find_bdev_for_inode( } /* - * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. If we are asked to wait, - * flush the workqueue. - */ -STATIC void -xfs_finish_ioend( - xfs_ioend_t *ioend, - int wait) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) { - queue_work(xfsdatad_workqueue, &ioend->io_work); - if (wait) - flush_workqueue(xfsdatad_workqueue); - } -} - -/* * We're now finished for good with this ioend structure. * Update the page state via the associated buffer_heads, * release holds on the inode and bio, and finally free @@ -310,6 +293,27 @@ xfs_end_bio_read( } /* + * Schedule IO completion handling on a xfsdatad if this was + * the final hold on this ioend. If we are asked to wait, + * flush the workqueue. + */ +STATIC void +xfs_finish_ioend( + xfs_ioend_t *ioend, + int wait) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) { + struct workqueue_struct *wq = xfsdatad_workqueue; + if (ioend->io_work.func == xfs_end_bio_unwritten) + wq = xfsconvertd_workqueue; + + queue_work(wq, &ioend->io_work); + if (wait) + flush_workqueue(wq); + } +} + +/* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. * We'll need to extend this for updating the ondisk inode size later diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 1dd52884975..221b3e66cee 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -19,6 +19,7 @@ #define __XFS_AOPS_H__ extern struct workqueue_struct *xfsdatad_workqueue; +extern struct workqueue_struct *xfsconvertd_workqueue; extern mempool_t *xfs_ioend_pool; /* diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index aa1016bb913..e28800a9f2b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = { static struct workqueue_struct *xfslogd_workqueue; struct workqueue_struct *xfsdatad_workqueue; +struct workqueue_struct *xfsconvertd_workqueue; #ifdef XFS_BUF_TRACE void @@ -1775,6 +1776,7 @@ xfs_flush_buftarg( xfs_buf_t *bp, *n; int pincount = 0; + xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfslogd_workqueue); @@ -1831,9 +1833,15 @@ xfs_buf_init(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; + xfsconvertd_workqueue = create_workqueue("xfsconvertd"); + if (!xfsconvertd_workqueue) + goto out_destroy_xfsdatad_workqueue; + register_shrinker(&xfs_buf_shake); return 0; + out_destroy_xfsdatad_workqueue: + destroy_workqueue(xfsdatad_workqueue); out_destroy_xfslogd_workqueue: destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: @@ -1849,6 +1857,7 @@ void xfs_buf_terminate(void) { unregister_shrinker(&xfs_buf_shake); + destroy_workqueue(xfsconvertd_workqueue); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 5aeb7777696..08be36d7326 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -74,14 +74,14 @@ xfs_flush_pages( if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = filemap_fdatawrite(mapping); - if (flags & XFS_B_ASYNC) - return -ret; - ret2 = filemap_fdatawait(mapping); - if (!ret) - ret = ret2; + ret = -filemap_fdatawrite(mapping); } - return -ret; + if (flags & XFS_B_ASYNC) + return ret; + ret2 = xfs_wait_on_pages(ip, first, last); + if (!ret) + ret = ret2; + return ret; } int diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 7e90daa0d1d..9142192ccbe 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -751,10 +751,26 @@ start: goto relock; } } else { + int enospc = 0; + ssize_t ret2 = 0; + +write_retry: xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); - ret = generic_file_buffered_write(iocb, iovp, segs, + ret2 = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); + /* + * if we just got an ENOSPC, flush the inode now we + * aren't holding any page locks and retry *once* + */ + if (ret2 == -ENOSPC && !enospc) { + error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE); + if (error) + goto out_unlock_internal; + enospc = 1; + goto write_retry; + } + ret = ret2; } current->backing_dev_info = NULL; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a608e72fa40..f7ba76633c2 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -62,12 +62,6 @@ xfs_sync_inodes_ag( uint32_t first_index = 0; int error = 0; int last_error = 0; - int fflag = XFS_B_ASYNC; - - if (flags & SYNC_DELWRI) - fflag = XFS_B_DELWRI; - if (flags & SYNC_WAIT) - fflag = 0; /* synchronous overrides all */ do { struct inode *inode; @@ -128,11 +122,23 @@ xfs_sync_inodes_ag( * If we have to flush data or wait for I/O completion * we need to hold the iolock. */ - if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { - xfs_ilock(ip, XFS_IOLOCK_SHARED); - lock_flags |= XFS_IOLOCK_SHARED; - error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); - if (flags & SYNC_IOWAIT) + if (flags & SYNC_DELWRI) { + if (VN_DIRTY(inode)) { + if (flags & SYNC_TRYLOCK) { + if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) + lock_flags |= XFS_IOLOCK_SHARED; + } else { + xfs_ilock(ip, XFS_IOLOCK_SHARED); + lock_flags |= XFS_IOLOCK_SHARED; + } + if (lock_flags & XFS_IOLOCK_SHARED) { + error = xfs_flush_pages(ip, 0, -1, + (flags & SYNC_WAIT) ? 0 + : XFS_B_ASYNC, + FI_NONE); + } + } + if (VN_CACHED(inode) && (flags & SYNC_IOWAIT)) xfs_ioend_wait(ip); } xfs_ilock(ip, XFS_ILOCK_SHARED); @@ -398,15 +404,17 @@ STATIC void xfs_syncd_queue_work( struct xfs_mount *mp, void *data, - void (*syncer)(struct xfs_mount *, void *)) + void (*syncer)(struct xfs_mount *, void *), + struct completion *completion) { - struct bhv_vfs_sync_work *work; + struct xfs_sync_work *work; - work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); + work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); INIT_LIST_HEAD(&work->w_list); work->w_syncer = syncer; work->w_data = data; work->w_mount = mp; + work->w_completion = completion; spin_lock(&mp->m_sync_lock); list_add_tail(&work->w_list, &mp->m_sync_list); spin_unlock(&mp->m_sync_lock); @@ -420,49 +428,26 @@ xfs_syncd_queue_work( * heads, looking about for more room... */ STATIC void -xfs_flush_inode_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - filemap_flush(inode->i_mapping); - iput(inode); -} - -void -xfs_flush_inode( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); - delay(msecs_to_jiffies(500)); -} - -/* - * This is the "bigger hammer" version of xfs_flush_inode_work... - * (IOW, "If at first you don't succeed, use a Bigger Hammer"). - */ -STATIC void -xfs_flush_device_work( +xfs_flush_inodes_work( struct xfs_mount *mp, void *arg) { struct inode *inode = arg; - sync_blockdev(mp->m_super->s_bdev); + xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK); + xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT); iput(inode); } void -xfs_flush_device( +xfs_flush_inodes( xfs_inode_t *ip) { struct inode *inode = VFS_I(ip); + DECLARE_COMPLETION_ONSTACK(completion); igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); - delay(msecs_to_jiffies(500)); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); + wait_for_completion(&completion); xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); } @@ -497,7 +482,7 @@ xfssyncd( { struct xfs_mount *mp = arg; long timeleft; - bhv_vfs_sync_work_t *work, *n; + xfs_sync_work_t *work, *n; LIST_HEAD (tmp); set_freezable(); @@ -532,6 +517,8 @@ xfssyncd( list_del(&work->w_list); if (work == &mp->m_sync_work) continue; + if (work->w_completion) + complete(work->w_completion); kmem_free(work); } } @@ -545,6 +532,7 @@ xfs_syncd_init( { mp->m_sync_work.w_syncer = xfs_sync_worker; mp->m_sync_work.w_mount = mp; + mp->m_sync_work.w_completion = NULL; mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); if (IS_ERR(mp->m_sync_task)) return -PTR_ERR(mp->m_sync_task); diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 04f058c848a..308d5bf6dfb 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -21,18 +21,20 @@ struct xfs_mount; struct xfs_perag; -typedef struct bhv_vfs_sync_work { +typedef struct xfs_sync_work { struct list_head w_list; struct xfs_mount *w_mount; void *w_data; /* syncer routine argument */ void (*w_syncer)(struct xfs_mount *, void *); -} bhv_vfs_sync_work_t; + struct completion *w_completion; +} xfs_sync_work_t; #define SYNC_ATTR 0x0001 /* sync attributes */ #define SYNC_DELWRI 0x0002 /* look at delayed writes */ #define SYNC_WAIT 0x0004 /* wait for i/o to complete */ #define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ #define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ +#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); @@ -43,8 +45,7 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); -void xfs_flush_inode(struct xfs_inode *ip); -void xfs_flush_device(struct xfs_inode *ip); +void xfs_flush_inodes(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 478e587087f..89b81eedce6 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -69,15 +69,6 @@ xfs_inode_alloc( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - /* - * initialise the VFS inode here to get failures - * out of the way early. - */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) { - kmem_zone_free(xfs_inode_zone, ip); - return NULL; - } - /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; @@ -113,6 +104,20 @@ xfs_inode_alloc( #ifdef XFS_DIR2_TRACE ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); #endif + /* + * Now initialise the VFS inode. We do this after the xfs_inode + * initialisation as internal failures will result in ->destroy_inode + * being called and that will pass down through the reclaim path and + * free the XFS inode. This path requires the XFS inode to already be + * initialised. Hence if this call fails, the xfs_inode has already + * been freed and we should not reference it at all in the error + * handling. + */ + if (!inode_init_always(mp->m_super, VFS_I(ip))) + return NULL; + + /* prevent anyone from using this yet */ + VFS_I(ip)->i_state = I_NEW|I_LOCK; return ip; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 08ce72316bf..5aaa2d7ec15 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -338,38 +338,6 @@ xfs_iomap_eof_align_last_fsb( } STATIC int -xfs_flush_space( - xfs_inode_t *ip, - int *fsynced, - int *ioflags) -{ - switch (*fsynced) { - case 0: - if (ip->i_delayed_blks) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_flush_inode(ip); - xfs_ilock(ip, XFS_ILOCK_EXCL); - *fsynced = 1; - } else { - *ioflags |= BMAPI_SYNC; - *fsynced = 2; - } - return 0; - case 1: - *fsynced = 2; - *ioflags |= BMAPI_SYNC; - return 0; - case 2: - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_flush_device(ip); - xfs_ilock(ip, XFS_ILOCK_EXCL); - *fsynced = 3; - return 0; - } - return 1; -} - -STATIC int xfs_cmn_err_fsblock_zero( xfs_inode_t *ip, xfs_bmbt_irec_t *imap) @@ -538,15 +506,9 @@ error_out: } /* - * If the caller is doing a write at the end of the file, - * then extend the allocation out to the file system's write - * iosize. We clean up any extra space left over when the - * file is closed in xfs_inactive(). - * - * For sync writes, we are flushing delayed allocate space to - * try to make additional space available for allocation near - * the filesystem full boundary - preallocation hurts in that - * situation, of course. + * If the caller is doing a write at the end of the file, then extend the + * allocation out to the file system's write iosize. We clean up any extra + * space left over when the file is closed in xfs_inactive(). */ STATIC int xfs_iomap_eof_want_preallocate( @@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate( int n, error, imaps; *prealloc = 0; - if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) + if ((offset + count) <= ip->i_size) return 0; /* @@ -611,7 +573,7 @@ xfs_iomap_write_delay( xfs_extlen_t extsz; int nimaps; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; - int prealloc, fsynced = 0; + int prealloc, flushed = 0; int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -627,12 +589,12 @@ xfs_iomap_write_delay( extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); -retry: error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; +retry: if (prealloc) { aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); @@ -659,15 +621,22 @@ retry: /* * If bmapi returned us nothing, and if we didn't get back EDQUOT, - * then we must have run out of space - flush delalloc, and retry.. + * then we must have run out of space - flush all other inodes with + * delalloc blocks and retry without EOF preallocation. */ if (nimaps == 0) { xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, ip, offset, count); - if (xfs_flush_space(ip, &fsynced, &ioflag)) + if (flushed) return XFS_ERROR(ENOSPC); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_flush_inodes(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); + + flushed = 1; error = 0; + prealloc = 0; goto retry; } diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index a1cc1322fc0..fdcf7b82747 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -40,8 +40,7 @@ typedef enum { BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ - BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ - BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */ + BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ } bmapi_flags_t; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f76c6d7cea2..3750f04ede0 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -562,9 +562,8 @@ xfs_log_mount( } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); - if (!mp->m_log) { - cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); - error = ENOMEM; + if (IS_ERR(mp->m_log)) { + error = -PTR_ERR(mp->m_log); goto out; } @@ -1180,10 +1179,13 @@ xlog_alloc_log(xfs_mount_t *mp, xfs_buf_t *bp; int i; int iclogsize; + int error = ENOMEM; log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); - if (!log) - return NULL; + if (!log) { + xlog_warn("XFS: Log allocation failed: No memory!"); + goto out; + } log->l_mp = mp; log->l_targ = log_target; @@ -1201,19 +1203,35 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_grant_reserve_cycle = 1; log->l_grant_write_cycle = 1; + error = EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; - ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); + if (log->l_sectbb_log < 0 || + log->l_sectbb_log > mp->m_sectbb_log) { + xlog_warn("XFS: Log sector size (0x%x) out of range.", + log->l_sectbb_log); + goto out_free_log; + } + /* for larger sector sizes, must have v2 or external log */ - ASSERT(log->l_sectbb_log == 0 || - log->l_logBBstart == 0 || - xfs_sb_version_haslogv2(&mp->m_sb)); - ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); + if (log->l_sectbb_log != 0 && + (log->l_logBBstart != 0 && + !xfs_sb_version_haslogv2(&mp->m_sb))) { + xlog_warn("XFS: log sector size (0x%x) invalid " + "for configuration.", log->l_sectbb_log); + goto out_free_log; + } + if (mp->m_sb.sb_logsectlog < BBSHIFT) { + xlog_warn("XFS: Log sector log (0x%x) too small.", + mp->m_sb.sb_logsectlog); + goto out_free_log; + } } log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; xlog_get_iclog_buffer_size(mp, log); + error = ENOMEM; bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); if (!bp) goto out_free_log; @@ -1313,7 +1331,8 @@ out_free_iclog: xfs_buf_free(log->l_xbuf); out_free_log: kmem_free(log); - return NULL; +out: + return ERR_PTR(-error); } /* xlog_alloc_log */ @@ -2541,18 +2560,19 @@ redo: xlog_ins_ticketq(&log->l_reserve_headq, tic); xlog_trace_loggrant(log, tic, "xlog_grant_log_space: sleep 2"); + spin_unlock(&log->l_grant_lock); + xlog_grant_push_ail(log->l_mp, need_bytes); + spin_lock(&log->l_grant_lock); + XFS_STATS_INC(xs_sleep_logspace); sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); - if (XLOG_FORCED_SHUTDOWN(log)) { - spin_lock(&log->l_grant_lock); + spin_lock(&log->l_grant_lock); + if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - } xlog_trace_loggrant(log, tic, "xlog_grant_log_space: wake 2"); - xlog_grant_push_ail(log->l_mp, need_bytes); - spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_reserve_headq, tic); @@ -2631,7 +2651,7 @@ xlog_regrant_write_log_space(xlog_t *log, * for more free space, otherwise try to get some space for * this transaction. */ - + need_bytes = tic->t_unit_res; if ((ntic = log->l_write_headq)) { free_bytes = xlog_space_left(log, log->l_grant_write_cycle, log->l_grant_write_bytes); @@ -2651,26 +2671,25 @@ xlog_regrant_write_log_space(xlog_t *log, xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: sleep 1"); + spin_unlock(&log->l_grant_lock); + xlog_grant_push_ail(log->l_mp, need_bytes); + spin_lock(&log->l_grant_lock); + XFS_STATS_INC(xs_sleep_logspace); sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already * off the queue */ - if (XLOG_FORCED_SHUTDOWN(log)) { - spin_lock(&log->l_grant_lock); + spin_lock(&log->l_grant_lock); + if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 1"); - xlog_grant_push_ail(log->l_mp, tic->t_unit_res); - spin_lock(&log->l_grant_lock); } } - need_bytes = tic->t_unit_res; - redo: if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; @@ -2680,19 +2699,20 @@ redo: if (free_bytes < need_bytes) { if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) xlog_ins_ticketq(&log->l_write_headq, tic); + spin_unlock(&log->l_grant_lock); + xlog_grant_push_ail(log->l_mp, need_bytes); + spin_lock(&log->l_grant_lock); + XFS_STATS_INC(xs_sleep_logspace); sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already off the queue */ - if (XLOG_FORCED_SHUTDOWN(log)) { - spin_lock(&log->l_grant_lock); + spin_lock(&log->l_grant_lock); + if (XLOG_FORCED_SHUTDOWN(log)) goto error_return; - } xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: wake 2"); - xlog_grant_push_ail(log->l_mp, need_bytes); - spin_lock(&log->l_grant_lock); goto redo; } else if (tic->t_flags & XLOG_TIC_IN_Q) xlog_del_ticketq(&log->l_write_headq, tic); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7af44adffc8..d6a64392f98 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -313,7 +313,7 @@ typedef struct xfs_mount { #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct task_struct *m_sync_task; /* generalised sync thread */ - bhv_vfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ + xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ struct list_head m_sync_list; /* sync thread work item list */ spinlock_t m_sync_lock; /* work item list lock */ int m_sync_seq; /* sync thread generation no. */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 7394c7af5de..19cf90a9c76 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1457,6 +1457,13 @@ xfs_create( error = xfs_trans_reserve(tp, resblks, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); if (error == ENOSPC) { + /* flush outstanding delalloc blocks and retry */ + xfs_flush_inodes(dp); + error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); + } + if (error == ENOSPC) { + /* No space at all so try a "no-allocation" reservation */ resblks = 0; error = xfs_trans_reserve(tp, 0, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); |