aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/netdevices.c3
-rw-r--r--fs/befs/super.c1
-rw-r--r--fs/bio.c18
-rw-r--r--fs/buffer.c69
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ext2/inode.c44
-rw-r--r--fs/ext3/inode.c23
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/inode.c8
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/fat/Kconfig3
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/inode.c8
-rw-r--r--fs/gfs2/inode.h14
-rw-r--r--fs/gfs2/ops_file.c8
-rw-r--r--fs/gfs2/ops_fstype.c5
-rw-r--r--fs/gfs2/ops_inode.c1
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/mdb.c1
-rw-r--r--fs/inode.c36
-rw-r--r--fs/jbd/revoke.c24
-rw-r--r--fs/nilfs2/bmap.c5
-rw-r--r--fs/nilfs2/nilfs.h5
-rw-r--r--fs/nilfs2/recovery.c20
-rw-r--r--fs/nilfs2/sufile.c290
-rw-r--r--fs/nilfs2/sufile.h79
-rw-r--r--fs/nilfs2/super.c7
-rw-r--r--fs/nilfs2/the_nilfs.c4
-rw-r--r--fs/ocfs2/file.c94
-rw-r--r--fs/pipe.c42
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/splice.c370
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c38
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c78
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h9
-rw-r--r--fs/xfs/xfs_iget.c23
-rw-r--r--fs/xfs/xfs_iomap.c61
-rw-r--r--fs/xfs/xfs_iomap.h3
-rw-r--r--fs/xfs/xfs_log.c78
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c7
48 files changed, 873 insertions, 695 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7a1d942ef68..0149dab365e 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -102,6 +102,7 @@ int afs_release(struct inode *inode, struct file *file)
return 0;
}
+#ifdef CONFIG_AFS_FSCACHE
/*
* deal with notification that a page was read from the cache
*/
@@ -117,6 +118,7 @@ static void afs_file_readpage_read_complete(struct page *page,
SetPageUptodate(page);
unlock_page(page);
}
+#endif
/*
* AFS read page from file, directory or symlink
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
index 49f18942306..7ad36506c25 100644
--- a/fs/afs/netdevices.c
+++ b/fs/afs/netdevices.c
@@ -20,8 +20,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen)
struct net_device *dev;
int ret = -ENODEV;
- if (maclen != ETH_ALEN)
- BUG();
+ BUG_ON(maclen != ETH_ALEN);
rtnl_lock();
dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
diff --git a/fs/befs/super.c b/fs/befs/super.c
index 41f2b4d0093..ca40f828f64 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -8,6 +8,7 @@
*/
#include <linux/fs.h>
+#include <asm/page.h> /* for PAGE_SIZE */
#include "befs.h"
#include "super.h"
diff --git a/fs/bio.c b/fs/bio.c
index e0c9e545bbf..cd42bb882f3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -348,6 +348,24 @@ err:
return NULL;
}
+/**
+ * bio_alloc - allocate a bio for I/O
+ * @gfp_mask: the GFP_ mask given to the slab allocator
+ * @nr_iovecs: number of iovecs to pre-allocate
+ *
+ * Description:
+ * bio_alloc will allocate a bio and associated bio_vec array that can hold
+ * at least @nr_iovecs entries. Allocations will be done from the
+ * fs_bio_set. Also see @bio_alloc_bioset.
+ *
+ * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
+ * a bio. This is due to the mempool guarantees. To make this work, callers
+ * must never allocate more than 1 bio at the time from this pool. Callers
+ * that need to allocate more than 1 bio must always submit the previously
+ * allocate bio for IO before attempting to allocate a new one. Failure to
+ * do so can cause livelocks under memory pressure.
+ *
+ **/
struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
{
struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
diff --git a/fs/buffer.c b/fs/buffer.c
index 6e35762b616..b3e5be7514f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -360,7 +360,7 @@ still_busy:
* Completion handler for block_write_full_page() - pages which are unlocked
* during I/O, and which have PageWriteback cleared upon I/O completion.
*/
-static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
+void end_buffer_async_write(struct buffer_head *bh, int uptodate)
{
char b[BDEVNAME_SIZE];
unsigned long flags;
@@ -438,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh)
set_buffer_async_read(bh);
}
-void mark_buffer_async_write(struct buffer_head *bh)
+void mark_buffer_async_write_endio(struct buffer_head *bh,
+ bh_end_io_t *handler)
{
- bh->b_end_io = end_buffer_async_write;
+ bh->b_end_io = handler;
set_buffer_async_write(bh);
}
+
+void mark_buffer_async_write(struct buffer_head *bh)
+{
+ mark_buffer_async_write_endio(bh, end_buffer_async_write);
+}
EXPORT_SYMBOL(mark_buffer_async_write);
@@ -547,7 +553,7 @@ repeat:
return err;
}
-void do_thaw_all(unsigned long unused)
+void do_thaw_all(struct work_struct *work)
{
struct super_block *sb;
char b[BDEVNAME_SIZE];
@@ -567,6 +573,7 @@ restart:
goto restart;
}
spin_unlock(&sb_lock);
+ kfree(work);
printk(KERN_WARNING "Emergency Thaw complete\n");
}
@@ -577,7 +584,13 @@ restart:
*/
void emergency_thaw_all(void)
{
- pdflush_operation(do_thaw_all, 0);
+ struct work_struct *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work) {
+ INIT_WORK(work, do_thaw_all);
+ schedule_work(work);
+ }
}
/**
@@ -1596,9 +1609,20 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
* locked buffer. This only can happen if someone has written the buffer
* directly, with submit_bh(). At the address_space level PageWriteback
* prevents this contention from occurring.
+ *
+ * If block_write_full_page() is called with wbc->sync_mode ==
+ * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this
+ * causes the writes to be flagged as synchronous writes, but the
+ * block device queue will NOT be unplugged, since usually many pages
+ * will be pushed to the out before the higher-level caller actually
+ * waits for the writes to be completed. The various wait functions,
+ * such as wait_on_writeback_range() will ultimately call sync_page()
+ * which will ultimately call blk_run_backing_dev(), which will end up
+ * unplugging the device queue.
*/
static int __block_write_full_page(struct inode *inode, struct page *page,
- get_block_t *get_block, struct writeback_control *wbc)
+ get_block_t *get_block, struct writeback_control *wbc,
+ bh_end_io_t *handler)
{
int err;
sector_t block;
@@ -1606,7 +1630,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
struct buffer_head *bh, *head;
const unsigned blocksize = 1 << inode->i_blkbits;
int nr_underway = 0;
- int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+ int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC_PLUG : WRITE);
BUG_ON(!PageLocked(page));
@@ -1682,7 +1707,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
continue;
}
if (test_clear_buffer_dirty(bh)) {
- mark_buffer_async_write(bh);
+ mark_buffer_async_write_endio(bh, handler);
} else {
unlock_buffer(bh);
}
@@ -1735,7 +1760,7 @@ recover:
if (buffer_mapped(bh) && buffer_dirty(bh) &&
!buffer_delay(bh)) {
lock_buffer(bh);
- mark_buffer_async_write(bh);
+ mark_buffer_async_write_endio(bh, handler);
} else {
/*
* The buffer may have been set dirty during
@@ -2661,7 +2686,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
out:
ret = mpage_writepage(page, get_block, wbc);
if (ret == -EAGAIN)
- ret = __block_write_full_page(inode, page, get_block, wbc);
+ ret = __block_write_full_page(inode, page, get_block, wbc,
+ end_buffer_async_write);
return ret;
}
EXPORT_SYMBOL(nobh_writepage);
@@ -2819,9 +2845,10 @@ out:
/*
* The generic ->writepage function for buffer-backed address_spaces
+ * this form passes in the end_io handler used to finish the IO.
*/
-int block_write_full_page(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc)
+int block_write_full_page_endio(struct page *page, get_block_t *get_block,
+ struct writeback_control *wbc, bh_end_io_t *handler)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
@@ -2830,7 +2857,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
/* Is the page fully inside i_size? */
if (page->index < end_index)
- return __block_write_full_page(inode, page, get_block, wbc);
+ return __block_write_full_page(inode, page, get_block, wbc,
+ handler);
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_CACHE_SIZE-1);
@@ -2853,9 +2881,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
* writes to that region are not written out to the file."
*/
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
- return __block_write_full_page(inode, page, get_block, wbc);
+ return __block_write_full_page(inode, page, get_block, wbc, handler);
+}
+
+/*
+ * The generic ->writepage function for buffer-backed address_spaces
+ */
+int block_write_full_page(struct page *page, get_block_t *get_block,
+ struct writeback_control *wbc)
+{
+ return block_write_full_page_endio(page, get_block, wbc,
+ end_buffer_async_write);
}
+
sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
get_block_t *get_block)
{
@@ -3324,9 +3363,11 @@ EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(block_sync_page);
EXPORT_SYMBOL(block_truncate_page);
EXPORT_SYMBOL(block_write_full_page);
+EXPORT_SYMBOL(block_write_full_page_endio);
EXPORT_SYMBOL(cont_write_begin);
EXPORT_SYMBOL(end_buffer_read_sync);
EXPORT_SYMBOL(end_buffer_write_sync);
+EXPORT_SYMBOL(end_buffer_async_write);
EXPORT_SYMBOL(file_fsync);
EXPORT_SYMBOL(generic_block_bmap);
EXPORT_SYMBOL(generic_cont_expand_simple);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da258e7249c..05763bbc205 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -307,8 +307,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, nr_vecs);
- if (bio == NULL)
- return -ENOMEM;
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b43b9556366..acf67883110 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -590,9 +590,8 @@ static int ext2_get_blocks(struct inode *inode,
if (depth == 0)
return (err);
-reread:
- partial = ext2_get_branch(inode, depth, offsets, chain, &err);
+ partial = ext2_get_branch(inode, depth, offsets, chain, &err);
/* Simplest case - block found, no allocation needed */
if (!partial) {
first_block = le32_to_cpu(chain[depth - 1].key);
@@ -602,15 +601,16 @@ reread:
while (count < maxblocks && count <= blocks_to_boundary) {
ext2_fsblk_t blk;
- if (!verify_chain(chain, partial)) {
+ if (!verify_chain(chain, chain + depth - 1)) {
/*
* Indirect block might be removed by
* truncate while we were reading it.
* Handling of that case: forget what we've
* got now, go to reread.
*/
+ err = -EAGAIN;
count = 0;
- goto changed;
+ break;
}
blk = le32_to_cpu(*(chain[depth-1].p + count));
if (blk == first_block + count)
@@ -618,7 +618,8 @@ reread:
else
break;
}
- goto got_it;
+ if (err != -EAGAIN)
+ goto got_it;
}
/* Next simple case - plain lookup or failed read of indirect block */
@@ -626,6 +627,33 @@ reread:
goto cleanup;
mutex_lock(&ei->truncate_mutex);
+ /*
+ * If the indirect block is missing while we are reading
+ * the chain(ext3_get_branch() returns -EAGAIN err), or
+ * if the chain has been changed after we grab the semaphore,
+ * (either because another process truncated this branch, or
+ * another get_block allocated this branch) re-grab the chain to see if
+ * the request block has been allocated or not.
+ *
+ * Since we already block the truncate/other get_block
+ * at this point, we will have the current copy of the chain when we
+ * splice the branch into the tree.
+ */
+ if (err == -EAGAIN || !verify_chain(chain, partial)) {
+ while (partial > chain) {
+ brelse(partial->bh);
+ partial--;
+ }
+ partial = ext2_get_branch(inode, depth, offsets, chain, &err);
+ if (!partial) {
+ count++;
+ mutex_unlock(&ei->truncate_mutex);
+ if (err)
+ goto cleanup;
+ clear_buffer_new(bh_result);
+ goto got_it;
+ }
+ }
/*
* Okay, we need to do block allocation. Lazily initialize the block
@@ -683,12 +711,6 @@ cleanup:
partial--;
}
return err;
-changed:
- while (partial > chain) {
- brelse(partial->bh);
- partial--;
- }
- goto reread;
}
int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 466a332e0bd..fcfa2436185 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1521,12 +1521,16 @@ static int ext3_ordered_writepage(struct page *page,
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
- } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
- /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */
- return block_write_full_page(page, NULL, wbc);
+ page_bufs = page_buffers(page);
+ } else {
+ page_bufs = page_buffers(page);
+ if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE,
+ NULL, buffer_unmapped)) {
+ /* Provide NULL get_block() to catch bugs if buffers
+ * weren't really mapped */
+ return block_write_full_page(page, NULL, wbc);
+ }
}
- page_bufs = page_buffers(page);
-
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
@@ -1581,6 +1585,15 @@ static int ext3_writeback_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ if (page_has_buffers(page)) {
+ if (!walk_page_buffers(NULL, page_buffers(page), 0,
+ PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
+ /* Provide NULL get_block() to catch bugs if buffers
+ * weren't really mapped */
+ return block_write_full_page(page, NULL, wbc);
+ }
+ }
+
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ac77d8b8251..2a1cb097976 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -342,7 +342,7 @@ static int ext4_valid_extent_idx(struct inode *inode,
ext4_fsblk_t block = idx_pblock(ext_idx);
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
- (block > ext4_blocks_count(es))))
+ (block >= ext4_blocks_count(es))))
return 0;
else
return 1;
@@ -2416,8 +2416,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
len = ee_len;
bio = bio_alloc(GFP_NOIO, len);
- if (!bio)
- return -ENOMEM;
bio->bi_sector = ee_pblock;
bio->bi_bdev = inode->i_sb->s_bdev;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a2e7952bc5f..c6bd6ced3bb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,16 +372,16 @@ static int ext4_block_to_path(struct inode *inode,
}
static int __ext4_check_blockref(const char *function, struct inode *inode,
- unsigned int *p, unsigned int max) {
+ __le32 *p, unsigned int max) {
unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
- unsigned int *bref = p;
+ __le32 *bref = p;
while (bref < p+max) {
- if (unlikely(*bref >= maxblocks)) {
+ if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
ext4_error(inode->i_sb, function,
"block reference %u >= max (%u) "
"in inode #%lu, offset=%d",
- *bref, maxblocks,
+ le32_to_cpu(*bref), maxblocks,
inode->i_ino, (int)(bref-p));
return -EIO;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9987bba99db..2958f4e6f22 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2508,6 +2508,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
goto cantfind_ext4;
+ /* check blocks count against device size */
+ blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
+ if (blocks_count && ext4_blocks_count(es) > blocks_count) {
+ printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
+ "exceeds size of device (%llu blocks)\n",
+ ext4_blocks_count(es), blocks_count);
+ goto failed_mount;
+ }
+
/*
* It makes no sense for the first data block to be beyond the end
* of the filesystem.
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index d0a69ff2537..182f9ffe2b5 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -95,3 +95,6 @@ config FAT_DEFAULT_IOCHARSET
Note that "utf8" is not recommended for FAT filesystems.
If unsure, you shouldn't set "utf8" here.
See <file:Documentation/filesystems/vfat.txt> for more information.
+
+ Enable any character sets you need in File Systems/Native Language
+ Support.
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2b25133524a..06f30e96567 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -938,9 +938,9 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
}
static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
- unsigned *nbytesp, int write)
+ size_t *nbytesp, int write)
{
- unsigned nbytes = *nbytesp;
+ size_t nbytes = *nbytesp;
unsigned long user_addr = (unsigned long) buf;
unsigned offset = user_addr & ~PAGE_MASK;
int npages;
@@ -955,7 +955,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
return 0;
}
- nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+ nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
down_read(&current->mm->mmap_sem);
@@ -1298,6 +1298,8 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
if (vma->vm_flags & VM_MAYSHARE)
return -ENODEV;
+ invalidate_inode_pages2(file->f_mapping);
+
return generic_file_mmap(file, vma);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3984e47d1d3..1afd9f26bcb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -597,7 +597,6 @@ __acquires(&gl->gl_spin)
GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
- down_read(&gfs2_umount_flush_sem);
if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
gl->gl_demote_state != gl->gl_state) {
if (find_first_holder(gl))
@@ -614,15 +613,14 @@ __acquires(&gl->gl_spin)
if (ret == 0)
goto out_unlock;
if (ret == 2)
- goto out_sem;
+ goto out;
gh = find_first_waiter(gl);
gl->gl_target = gh->gh_state;
if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
do_error(gl, 0); /* Fail queued try locks */
}
do_xmote(gl, gh, gl->gl_target);
-out_sem:
- up_read(&gfs2_umount_flush_sem);
+out:
return;
out_sched:
@@ -631,7 +629,7 @@ out_sched:
gfs2_glock_put(gl);
out_unlock:
clear_bit(GLF_LOCK, &gl->gl_flags);
- goto out_sem;
+ goto out;
}
static void glock_work_func(struct work_struct *work)
@@ -641,6 +639,7 @@ static void glock_work_func(struct work_struct *work)
if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags))
finish_xmote(gl, gl->gl_reply);
+ down_read(&gfs2_umount_flush_sem);
spin_lock(&gl->gl_spin);
if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
gl->gl_state != LM_ST_UNLOCKED &&
@@ -653,6 +652,7 @@ static void glock_work_func(struct work_struct *work)
}
run_queue(gl, 0);
spin_unlock(&gl->gl_spin);
+ up_read(&gfs2_umount_flush_sem);
if (!delay ||
queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7b277d44915..5a31d426116 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -137,15 +137,15 @@ void gfs2_set_iop(struct inode *inode)
if (S_ISREG(mode)) {
inode->i_op = &gfs2_file_iops;
if (gfs2_localflocks(sdp))
- inode->i_fop = gfs2_file_fops_nolock;
+ inode->i_fop = &gfs2_file_fops_nolock;
else
- inode->i_fop = gfs2_file_fops;
+ inode->i_fop = &gfs2_file_fops;
} else if (S_ISDIR(mode)) {
inode->i_op = &gfs2_dir_iops;
if (gfs2_localflocks(sdp))
- inode->i_fop = gfs2_dir_fops_nolock;
+ inode->i_fop = &gfs2_dir_fops_nolock;
else
- inode->i_fop = gfs2_dir_fops;
+ inode->i_fop = &gfs2_dir_fops;
} else if (S_ISLNK(mode)) {
inode->i_op = &gfs2_symlink_iops;
} else {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index dca4fee3078..c30be2b6658 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -101,21 +101,23 @@ void gfs2_dinode_print(const struct gfs2_inode *ip);
extern const struct inode_operations gfs2_file_iops;
extern const struct inode_operations gfs2_dir_iops;
extern const struct inode_operations gfs2_symlink_iops;
-extern const struct file_operations *gfs2_file_fops_nolock;
-extern const struct file_operations *gfs2_dir_fops_nolock;
+extern const struct file_operations gfs2_file_fops_nolock;
+extern const struct file_operations gfs2_dir_fops_nolock;
extern void gfs2_set_inode_flags(struct inode *inode);
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
-extern const struct file_operations *gfs2_file_fops;
-extern const struct file_operations *gfs2_dir_fops;
+extern const struct file_operations gfs2_file_fops;
+extern const struct file_operations gfs2_dir_fops;
+
static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
{
return sdp->sd_args.ar_localflocks;
}
#else /* Single node only */
-#define gfs2_file_fops NULL
-#define gfs2_dir_fops NULL
+#define gfs2_file_fops gfs2_file_fops_nolock
+#define gfs2_dir_fops gfs2_dir_fops_nolock
+
static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
{
return 1;
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 70b9b854894..101caf3ee86 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -705,7 +705,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
}
}
-const struct file_operations *gfs2_file_fops = &(const struct file_operations){
+const struct file_operations gfs2_file_fops = {
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
@@ -723,7 +723,7 @@ const struct file_operations *gfs2_file_fops = &(const struct file_operations){
.setlease = gfs2_setlease,
};
-const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
+const struct file_operations gfs2_dir_fops = {
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
@@ -735,7 +735,7 @@ const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
-const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){
+const struct file_operations gfs2_file_fops_nolock = {
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
@@ -751,7 +751,7 @@ const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operat
.setlease = generic_setlease,
};
-const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){
+const struct file_operations gfs2_dir_fops_nolock = {
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 51883b3ad89..650a730707b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -272,11 +272,6 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
lock_page(page);
bio = bio_alloc(GFP_NOFS, 1);
- if (unlikely(!bio)) {
- __free_page(page);
- return -ENOBUFS;
- }
-
bio->bi_sector = sector * (sb->s_blocksize >> 9);
bio->bi_bdev = sb->s_bdev;
bio_add_page(bio, page, PAGE_SIZE, 0);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index abd5429ae28..1c70fa5168d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -371,6 +371,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
ip = ghs[1].gh_gl->gl_object;
ip->i_disksize = size;
+ i_size_write(inode, size);
error = gfs2_meta_inode_buffer(ip, &dibh);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8d53f66b5bc..152e6c4a0dc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -81,7 +81,7 @@ struct gfs2_quota_change_host {
static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
-static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(qd_lru_lock);
int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
{
@@ -1364,7 +1364,7 @@ int gfs2_quotad(void *data)
refrigerator();
t = min(quotad_timeo, statfs_timeo);
- prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
spin_lock(&sdp->sd_trunc_lock);
empty = list_empty(&sdp->sd_trunc_list);
spin_unlock(&sdp->sd_trunc_lock);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9435dda8f1e..a1cbff2b4d9 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -70,6 +70,10 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
BUG();
return 0;
}
+
+ if (!tree)
+ return 0;
+
if (tree->node_size >= PAGE_CACHE_SIZE) {
nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
spin_lock(&tree->hash_lock);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 36ca2e1a4fa..7b6165f25fb 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -349,6 +349,7 @@ void hfs_mdb_put(struct super_block *sb)
if (HFS_SB(sb)->nls_disk)
unload_nls(HFS_SB(sb)->nls_disk);
+ free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
kfree(HFS_SB(sb));
sb->s_fs_info = NULL;
}
diff --git a/fs/inode.c b/fs/inode.c
index d06d6d268de..6ad14a1cd8c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode)
spin_lock(&inode_lock);
}
-/*
- * We rarely want to lock two inodes that do not have a parent/child
- * relationship (such as directory, child inode) simultaneously. The
- * vast majority of file systems should be able to get along fine
- * without this. Do not use these functions except as a last resort.
- */
-void inode_double_lock(struct inode *inode1, struct inode *inode2)
-{
- if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
- if (inode1)
- mutex_lock(&inode1->i_mutex);
- else if (inode2)
- mutex_lock(&inode2->i_mutex);
- return;
- }
-
- if (inode1 < inode2) {
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
- }
-}
-EXPORT_SYMBOL(inode_double_lock);
-
-void inode_double_unlock(struct inode *inode1, struct inode *inode2)
-{
- if (inode1)
- mutex_unlock(&inode1->i_mutex);
-
- if (inode2 && inode2 != inode1)
- mutex_unlock(&inode2->i_mutex);
-}
-EXPORT_SYMBOL(inode_double_unlock);
-
static __initdata unsigned long ihash_entries;
static int __init set_ihash_entries(char *str)
{
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index c7bd649bbbd..3e9afc2a91d 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -55,6 +55,25 @@
* need do nothing.
* RevokeValid set, Revoked set:
* buffer has been revoked.
+ *
+ * Locking rules:
+ * We keep two hash tables of revoke records. One hashtable belongs to the
+ * running transaction (is pointed to by journal->j_revoke), the other one
+ * belongs to the committing transaction. Accesses to the second hash table
+ * happen only from the kjournald and no other thread touches this table. Also
+ * journal_switch_revoke_table() which switches which hashtable belongs to the
+ * running and which to the committing transaction is called only from
+ * kjournald. Therefore we need no locks when accessing the hashtable belonging
+ * to the committing transaction.
+ *
+ * All users operating on the hash table belonging to the running transaction
+ * have a handle to the transaction. Therefore they are safe from kjournald
+ * switching hash tables under them. For operations on the lists of entries in
+ * the hash table j_revoke_lock is used.
+ *
+ * Finally, also replay code uses the hash tables but at this moment noone else
+ * can touch them (filesystem isn't mounted yet) and hence no locking is
+ * needed.
*/
#ifndef __KERNEL__
@@ -402,8 +421,6 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
* the second time we would still have a pending revoke to cancel. So,
* do not trust the Revoked bit on buffers unless RevokeValid is also
* set.
- *
- * The caller must have the journal locked.
*/
int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
{
@@ -481,10 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
/*
* Write revoke records to the journal for all entries in the current
* revoke hash, deleting the entries as we go.
- *
- * Called with the journal lock held.
*/
-
void journal_write_revoke_records(journal_t *journal,
transaction_t *transaction)
{
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 24638e059bf..064279e33bb 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -688,6 +688,8 @@ static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = {
.bpop_translate = NULL,
};
+static struct lock_class_key nilfs_bmap_dat_lock_key;
+
/**
* nilfs_bmap_read - read a bmap from an inode
* @bmap: bmap
@@ -715,6 +717,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
bmap->b_pops = &nilfs_bmap_ptr_ops_p;
bmap->b_last_allocated_key = 0; /* XXX: use macro */
bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
+ lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
break;
case NILFS_CPFILE_INO:
case NILFS_SUFILE_INO:
@@ -772,6 +775,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
{
memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union));
init_rwsem(&gcbmap->b_sem);
+ lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode;
}
@@ -779,5 +783,6 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
{
memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union));
init_rwsem(&bmap->b_sem);
+ lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
}
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 7558c977db0..3d0c18a16db 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -35,11 +35,6 @@
#include "bmap_union.h"
/*
- * NILFS filesystem version
- */
-#define NILFS_VERSION "2.0.5"
-
-/*
* nilfs inode data in memory
*/
struct nilfs_inode_info {
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 6ade0963fc1..4fc081e47d7 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -413,7 +413,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
struct nilfs_segment_entry *ent, *n;
struct inode *sufile = nilfs->ns_sufile;
__u64 segnum[4];
- time_t mtime;
int err;
int i;
@@ -442,24 +441,13 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
* Collecting segments written after the latest super root.
* These are marked dirty to avoid being reallocated in the next write.
*/
- mtime = get_seconds();
list_for_each_entry_safe(ent, n, head, list) {
- if (ent->segnum == segnum[0]) {
- list_del(&ent->list);
- nilfs_free_segment_entry(ent);
- continue;
- }
- err = nilfs_open_segment_entry(ent, sufile);
- if (unlikely(err))
- goto failed;
- if (!nilfs_segment_usage_dirty(ent->raw_su)) {
- /* make the segment garbage */
- ent->raw_su->su_nblocks = cpu_to_le32(0);
- ent->raw_su->su_lastmod = cpu_to_le32(mtime);
- nilfs_segment_usage_set_dirty(ent->raw_su);
+ if (ent->segnum != segnum[0]) {
+ err = nilfs_sufile_scrap(sufile, ent->segnum);
+ if (unlikely(err))
+ goto failed;
}
list_del(&ent->list);
- nilfs_close_segment_entry(ent, sufile);
nilfs_free_segment_entry(ent);
}
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index c774cf397e2..98e68677f04 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -93,6 +93,52 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
create, NULL, bhp);
}
+static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
+ u64 ncleanadd, u64 ndirtyadd)
+{
+ struct nilfs_sufile_header *header;
+ void *kaddr;
+
+ kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+ header = kaddr + bh_offset(header_bh);
+ le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
+ le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ nilfs_mdt_mark_buffer_dirty(header_bh);
+}
+
+int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
+ void (*dofunc)(struct inode *, __u64,
+ struct buffer_head *,
+ struct buffer_head *))
+{
+ struct buffer_head *header_bh, *bh;
+ int ret;
+
+ if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
+ printk(KERN_WARNING "%s: invalid segment number: %llu\n",
+ __func__, (unsigned long long)segnum);
+ return -EINVAL;
+ }
+ down_write(&NILFS_MDT(sufile)->mi_sem);
+
+ ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+ if (ret < 0)
+ goto out_sem;
+
+ ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh);
+ if (!ret) {
+ dofunc(sufile, segnum, header_bh, bh);
+ brelse(bh);
+ }
+ brelse(header_bh);
+
+ out_sem:
+ up_write(&NILFS_MDT(sufile)->mi_sem);
+ return ret;
+}
+
/**
* nilfs_sufile_alloc - allocate a segment
* @sufile: inode of segment usage file
@@ -113,7 +159,6 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
{
struct buffer_head *header_bh, *su_bh;
- struct the_nilfs *nilfs;
struct nilfs_sufile_header *header;
struct nilfs_segment_usage *su;
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
@@ -124,8 +169,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
down_write(&NILFS_MDT(sufile)->mi_sem);
- nilfs = NILFS_MDT(sufile)->mi_nilfs;
-
ret = nilfs_sufile_get_header_block(sufile, &header_bh);
if (ret < 0)
goto out_sem;
@@ -192,165 +235,84 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
return ret;
}
-/**
- * nilfs_sufile_cancel_free -
- * @sufile: inode of segment usage file
- * @segnum: segment number
- *
- * Description:
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- */
-int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
+void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
+ struct buffer_head *header_bh,
+ struct buffer_head *su_bh)
{
- struct buffer_head *header_bh, *su_bh;
- struct the_nilfs *nilfs;
- struct nilfs_sufile_header *header;
struct nilfs_segment_usage *su;
void *kaddr;
- int ret;
-
- down_write(&NILFS_MDT(sufile)->mi_sem);
-
- nilfs = NILFS_MDT(sufile)->mi_nilfs;
-
- ret = nilfs_sufile_get_header_block(sufile, &header_bh);
- if (ret < 0)
- goto out_sem;
-
- ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
- if (ret < 0)
- goto out_header;
kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum, su_bh, kaddr);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
if (unlikely(!nilfs_segment_usage_clean(su))) {
printk(KERN_WARNING "%s: segment %llu must be clean\n",
__func__, (unsigned long long)segnum);
kunmap_atomic(kaddr, KM_USER0);
- goto out_su_bh;
+ return;
}
nilfs_segment_usage_set_dirty(su);
kunmap_atomic(kaddr, KM_USER0);
- kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
- header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
- le64_add_cpu(&header->sh_ncleansegs, -1);
- le64_add_cpu(&header->sh_ndirtysegs, 1);
- kunmap_atomic(kaddr, KM_USER0);
-
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ nilfs_sufile_mod_counter(header_bh, -1, 1);
nilfs_mdt_mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
-
- out_su_bh:
- brelse(su_bh);
- out_header:
- brelse(header_bh);
- out_sem:
- up_write(&NILFS_MDT(sufile)->mi_sem);
- return ret;
}
-/**
- * nilfs_sufile_freev - free segments
- * @sufile: inode of segment usage file
- * @segnum: array of segment numbers
- * @nsegs: number of segments
- *
- * Description: nilfs_sufile_freev() frees segments specified by @segnum and
- * @nsegs, which must have been returned by a previous call to
- * nilfs_sufile_alloc().
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- */
-#define NILFS_SUFILE_FREEV_PREALLOC 16
-int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs)
+void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
+ struct buffer_head *header_bh,
+ struct buffer_head *su_bh)
{
- struct buffer_head *header_bh, **su_bh,
- *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC];
- struct the_nilfs *nilfs;
- struct nilfs_sufile_header *header;
struct nilfs_segment_usage *su;
void *kaddr;
- int ret, i;
+ int clean, dirty;
- down_write(&NILFS_MDT(sufile)->mi_sem);
-
- nilfs = NILFS_MDT(sufile)->mi_nilfs;
-
- /* prepare resources */
- if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC)
- su_bh = su_bh_prealloc;
- else {
- su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS);
- if (su_bh == NULL) {
- ret = -ENOMEM;
- goto out_sem;
- }
- }
-
- ret = nilfs_sufile_get_header_block(sufile, &header_bh);
- if (ret < 0)
- goto out_su_bh;
- for (i = 0; i < nsegs; i++) {
- ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i],
- 0, &su_bh[i]);
- if (ret < 0)
- goto out_bh;
- }
-
- /* free segments */
- for (i = 0; i < nsegs; i++) {
- kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0);
- su = nilfs_sufile_block_get_segment_usage(
- sufile, segnum[i], su_bh[i], kaddr);
- WARN_ON(nilfs_segment_usage_error(su));
- nilfs_segment_usage_set_clean(su);
+ kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
+ su->su_nblocks == cpu_to_le32(0)) {
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(su_bh[i]);
+ return;
}
- kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
- header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
- le64_add_cpu(&header->sh_ncleansegs, nsegs);
- le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs);
+ clean = nilfs_segment_usage_clean(su);
+ dirty = nilfs_segment_usage_dirty(su);
+
+ /* make the segment garbage */
+ su->su_lastmod = cpu_to_le64(0);
+ su->su_nblocks = cpu_to_le32(0);
+ su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+
+ nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
+ nilfs_mdt_mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
+}
- out_bh:
- for (i--; i >= 0; i--)
- brelse(su_bh[i]);
- brelse(header_bh);
+void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
+ struct buffer_head *header_bh,
+ struct buffer_head *su_bh)
+{
+ struct nilfs_segment_usage *su;
+ void *kaddr;
+ int sudirty;
- out_su_bh:
- if (su_bh != su_bh_prealloc)
- kfree(su_bh);
+ kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+ if (nilfs_segment_usage_clean(su)) {
+ printk(KERN_WARNING "%s: segment %llu is already clean\n",
+ __func__, (unsigned long long)segnum);
+ kunmap_atomic(kaddr, KM_USER0);
+ return;
+ }
+ WARN_ON(nilfs_segment_usage_error(su));
+ WARN_ON(!nilfs_segment_usage_dirty(su));
- out_sem:
- up_write(&NILFS_MDT(sufile)->mi_sem);
- return ret;
-}
+ sudirty = nilfs_segment_usage_dirty(su);
+ nilfs_segment_usage_set_clean(su);
+ kunmap_atomic(kaddr, KM_USER0);
+ nilfs_mdt_mark_buffer_dirty(su_bh);
-/**
- * nilfs_sufile_free -
- * @sufile:
- * @segnum:
- */
-int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
-{
- return nilfs_sufile_freev(sufile, &segnum, 1);
+ nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
+ nilfs_mdt_mark_dirty(sufile);
}
/**
@@ -500,72 +462,28 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp)
return ret;
}
-/**
- * nilfs_sufile_set_error - mark a segment as erroneous
- * @sufile: inode of segment usage file
- * @segnum: segment number
- *
- * Description: nilfs_sufile_set_error() marks the segment specified by
- * @segnum as erroneous. The error segment will never be used again.
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid segment usage number.
- */
-int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
+void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
+ struct buffer_head *header_bh,
+ struct buffer_head *su_bh)
{
- struct buffer_head *header_bh, *su_bh;
struct nilfs_segment_usage *su;
- struct nilfs_sufile_header *header;
void *kaddr;
- int ret;
-
- if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
- printk(KERN_WARNING "%s: invalid segment number: %llu\n",
- __func__, (unsigned long long)segnum);
- return -EINVAL;
- }
- down_write(&NILFS_MDT(sufile)->mi_sem);
-
- ret = nilfs_sufile_get_header_block(sufile, &header_bh);
- if (ret < 0)
- goto out_sem;
- ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
- if (ret < 0)
- goto out_header;
+ int suclean;
kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
if (nilfs_segment_usage_error(su)) {
kunmap_atomic(kaddr, KM_USER0);
- brelse(su_bh);
- goto out_header;
+ return;
}
-
+ suclean = nilfs_segment_usage_clean(su);
nilfs_segment_usage_set_error(su);
kunmap_atomic(kaddr, KM_USER0);
- brelse(su_bh);
- kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
- header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
- le64_add_cpu(&header->sh_ndirtysegs, -1);
- kunmap_atomic(kaddr, KM_USER0);
- nilfs_mdt_mark_buffer_dirty(header_bh);
+ if (suclean)
+ nilfs_sufile_mod_counter(header_bh, -1, 0);
nilfs_mdt_mark_buffer_dirty(su_bh);
nilfs_mdt_mark_dirty(sufile);
- brelse(su_bh);
-
- out_header:
- brelse(header_bh);
-
- out_sem:
- up_write(&NILFS_MDT(sufile)->mi_sem);
- return ret;
}
/**
@@ -625,7 +543,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum,
si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks);
si[i + j].sui_flags = le32_to_cpu(su->su_flags) &
~(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
- if (nilfs_segment_is_active(nilfs, segnum + i + j))
+ if (nilfs_segment_is_active(nilfs, segnum + j))
si[i + j].sui_flags |=
(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
}
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index d595f33a768..a2e2efd4ade 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -36,9 +36,6 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
}
int nilfs_sufile_alloc(struct inode *, __u64 *);
-int nilfs_sufile_cancel_free(struct inode *, __u64);
-int nilfs_sufile_freev(struct inode *, __u64 *, size_t);
-int nilfs_sufile_free(struct inode *, __u64);
int nilfs_sufile_get_segment_usage(struct inode *, __u64,
struct nilfs_segment_usage **,
struct buffer_head **);
@@ -46,9 +43,83 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64,
struct buffer_head *);
int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
-int nilfs_sufile_set_error(struct inode *, __u64);
ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *,
size_t);
+int nilfs_sufile_update(struct inode *, __u64, int,
+ void (*dofunc)(struct inode *, __u64,
+ struct buffer_head *,
+ struct buffer_head *));
+void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
+ struct buffer_head *);
+void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *,
+ struct buffer_head *);
+void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *,
+ struct buffer_head *);
+void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
+ struct buffer_head *);
+
+/**
+ * nilfs_sufile_cancel_free -
+ * @sufile: inode of segment usage file
+ * @segnum: segment number
+ *
+ * Description:
+ *
+ * Return Value: On success, 0 is returned. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ */
+static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
+{
+ return nilfs_sufile_update(sufile, segnum, 0,
+ nilfs_sufile_do_cancel_free);
+}
+
+/**
+ * nilfs_sufile_scrap - make a segment garbage
+ * @sufile: inode of segment usage file
+ * @segnum: segment number to be freed
+ */
+static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
+{
+ return nilfs_sufile_update(sufile, segnum, 1, nilfs_sufile_do_scrap);
+}
+
+/**
+ * nilfs_sufile_free - free segment
+ * @sufile: inode of segment usage file
+ * @segnum: segment number to be freed
+ */
+static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
+{
+ return nilfs_sufile_update(sufile, segnum, 0, nilfs_sufile_do_free);
+}
+
+/**
+ * nilfs_sufile_set_error - mark a segment as erroneous
+ * @sufile: inode of segment usage file
+ * @segnum: segment number
+ *
+ * Description: nilfs_sufile_set_error() marks the segment specified by
+ * @segnum as erroneous. The error segment will never be used again.
+ *
+ * Return Value: On success, 0 is returned. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EINVAL - Invalid segment usage number.
+ */
+static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
+{
+ return nilfs_sufile_update(sufile, segnum, 0,
+ nilfs_sufile_do_set_error);
+}
#endif /* _NILFS_SUFILE_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index e117e1ea9bf..6989b03e97a 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -63,7 +63,6 @@
MODULE_AUTHOR("NTT Corp.");
MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
"(NILFS)");
-MODULE_VERSION(NILFS_VERSION);
MODULE_LICENSE("GPL");
static int nilfs_remount(struct super_block *sb, int *flags, char *data);
@@ -476,11 +475,12 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct nilfs_sb_info *sbi = NILFS_SB(sb);
+ struct the_nilfs *nilfs = sbi->s_nilfs;
+ u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
unsigned long long blocks;
unsigned long overhead;
unsigned long nrsvblocks;
sector_t nfreeblocks;
- struct the_nilfs *nilfs = sbi->s_nilfs;
int err;
/*
@@ -514,6 +514,9 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = atomic_read(&sbi->s_inodes_count);
buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */
buf->f_namelen = NILFS_NAME_LEN;
+ buf->f_fsid.val[0] = (u32)id;
+ buf->f_fsid.val[1] = (u32)(id >> 32);
+
return 0;
}
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 33400cf0bbe..7f65b3be4aa 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -115,6 +115,7 @@ void put_nilfs(struct the_nilfs *nilfs)
static int nilfs_load_super_root(struct the_nilfs *nilfs,
struct nilfs_sb_info *sbi, sector_t sr_block)
{
+ static struct lock_class_key dat_lock_key;
struct buffer_head *bh_sr;
struct nilfs_super_root *raw_sr;
struct nilfs_super_block **sbp = nilfs->ns_sbp;
@@ -163,6 +164,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
if (unlikely(err))
goto failed_sufile;
+ lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key);
+ lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key);
+
nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat);
nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size,
sizeof(struct nilfs_cpfile_header));
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8672b953603..c2a87c885b7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1912,6 +1912,22 @@ out_sems:
return written ? written : ret;
}
+static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
+ struct file *out,
+ struct splice_desc *sd)
+{
+ int ret;
+
+ ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
+ sd->total_len, 0, NULL);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ return splice_from_pipe_feed(pipe, sd, pipe_to_file);
+}
+
static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out,
loff_t *ppos,
@@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
unsigned int flags)
{
int ret;
- struct inode *inode = out->f_path.dentry->d_inode;
+ struct address_space *mapping = out->f_mapping;
+ struct inode *inode = mapping->host;
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ .u.file = out,
+ };
mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
(unsigned int)len,
out->f_path.dentry->d_name.len,
out->f_path.dentry->d_name.name);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ if (pipe->inode)
+ mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
- ret = ocfs2_rw_lock(inode, 1);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
+ splice_from_pipe_begin(&sd);
+ do {
+ ret = splice_from_pipe_next(pipe, &sd);
+ if (ret <= 0)
+ break;
- ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,
- NULL);
- if (ret < 0) {
- mlog_errno(ret);
- goto out_unlock;
- }
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ ret = ocfs2_rw_lock(inode, 1);
+ if (ret < 0)
+ mlog_errno(ret);
+ else {
+ ret = ocfs2_splice_to_file(pipe, out, &sd);
+ ocfs2_rw_unlock(inode, 1);
+ }
+ mutex_unlock(&inode->i_mutex);
+ } while (ret > 0);
+ splice_from_pipe_end(pipe, &sd);
if (pipe->inode)
- mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
- ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
- if (pipe->inode)
mutex_unlock(&pipe->inode->i_mutex);
-out_unlock:
- ocfs2_rw_unlock(inode, 1);
-out:
- mutex_unlock(&inode->i_mutex);
+ if (sd.num_spliced)
+ ret = sd.num_spliced;
+
+ if (ret > 0) {
+ unsigned long nr_pages;
+
+ *ppos += ret;
+ nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ /*
+ * If file or inode is SYNC and we actually wrote some data,
+ * sync it.
+ */
+ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+ int err;
+
+ mutex_lock(&inode->i_mutex);
+ err = ocfs2_rw_lock(inode, 1);
+ if (err < 0) {
+ mlog_errno(err);
+ } else {
+ err = generic_osync_inode(inode, mapping,
+ OSYNC_METADATA|OSYNC_DATA);
+ ocfs2_rw_unlock(inode, 1);
+ }
+ mutex_unlock(&inode->i_mutex);
+
+ if (err)
+ ret = err;
+ }
+ balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
+ }
mlog_exit(ret);
return ret;
diff --git a/fs/pipe.c b/fs/pipe.c
index 4af7aa52181..13414ec45b8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -37,6 +37,42 @@
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
*/
+static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+{
+ if (pipe->inode)
+ mutex_lock_nested(&pipe->inode->i_mutex, subclass);
+}
+
+void pipe_lock(struct pipe_inode_info *pipe)
+{
+ /*
+ * pipe_lock() nests non-pipe inode locks (for writing to a file)
+ */
+ pipe_lock_nested(pipe, I_MUTEX_PARENT);
+}
+EXPORT_SYMBOL(pipe_lock);
+
+void pipe_unlock(struct pipe_inode_info *pipe)
+{
+ if (pipe->inode)
+ mutex_unlock(&pipe->inode->i_mutex);
+}
+EXPORT_SYMBOL(pipe_unlock);
+
+void pipe_double_lock(struct pipe_inode_info *pipe1,
+ struct pipe_inode_info *pipe2)
+{
+ BUG_ON(pipe1 == pipe2);
+
+ if (pipe1 < pipe2) {
+ pipe_lock_nested(pipe1, I_MUTEX_PARENT);
+ pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+ } else {
+ pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+ pipe_lock_nested(pipe1, I_MUTEX_PARENT);
+ }
+}
+
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe)
{
@@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
* is considered a noninteractive wait:
*/
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
schedule();
finish_wait(&pipe->wait, &wait);
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
}
static int
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 12c20377772..64a72e2e765 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -135,7 +135,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
- pgoff = (loff_t)vma->pg_off << PAGE_SHIFT;
+ pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
}
seq_printf(m,
diff --git a/fs/splice.c b/fs/splice.c
index c18aa7e03e2..666953d59a3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
do_wakeup = 0;
page_nr = 0;
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
for (;;) {
if (!pipe->readers) {
@@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
pipe->waiting_writers--;
}
- if (pipe->inode) {
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ if (do_wakeup) {
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
while (page_nr < spd_pages)
@@ -555,8 +552,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
* SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
* a new page in the output file page cache and fill/dirty that.
*/
-static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
- struct splice_desc *sd)
+int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
{
struct file *file = sd->u.file;
struct address_space *mapping = file->f_mapping;
@@ -600,108 +597,177 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
out:
return ret;
}
+EXPORT_SYMBOL(pipe_to_file);
+
+static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
+{
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
+}
/**
- * __splice_from_pipe - splice data from a pipe to given actor
+ * splice_from_pipe_feed - feed available data from a pipe to a file
* @pipe: pipe to splice from
* @sd: information to @actor
* @actor: handler that splices the data
*
* Description:
- * This function does little more than loop over the pipe and call
- * @actor to do the actual moving of a single struct pipe_buffer to
- * the desired destination. See pipe_to_file, pipe_to_sendpage, or
- * pipe_to_user.
+ * This function loops over the pipe and calls @actor to do the
+ * actual moving of a single struct pipe_buffer to the desired
+ * destination. It returns when there's no more buffers left in
+ * the pipe or if the requested number of bytes (@sd->total_len)
+ * have been copied. It returns a positive number (one) if the
+ * pipe needs to be filled with more data, zero if the required
+ * number of bytes have been copied and -errno on error.
*
+ * This, together with splice_from_pipe_{begin,end,next}, may be
+ * used to implement the functionality of __splice_from_pipe() when
+ * locking is required around copying the pipe buffers to the
+ * destination.
*/
-ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
- splice_actor *actor)
+int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
+ splice_actor *actor)
{
- int ret, do_wakeup, err;
-
- ret = 0;
- do_wakeup = 0;
-
- for (;;) {
- if (pipe->nrbufs) {
- struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
- const struct pipe_buf_operations *ops = buf->ops;
+ int ret;
- sd->len = buf->len;
- if (sd->len > sd->total_len)
- sd->len = sd->total_len;
+ while (pipe->nrbufs) {
+ struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+ const struct pipe_buf_operations *ops = buf->ops;
- err = actor(pipe, buf, sd);
- if (err <= 0) {
- if (!ret && err != -ENODATA)
- ret = err;
+ sd->len = buf->len;
+ if (sd->len > sd->total_len)
+ sd->len = sd->total_len;
- break;
- }
+ ret = actor(pipe, buf, sd);
+ if (ret <= 0) {
+ if (ret == -ENODATA)
+ ret = 0;
+ return ret;
+ }
+ buf->offset += ret;
+ buf->len -= ret;
- ret += err;
- buf->offset += err;
- buf->len -= err;
+ sd->num_spliced += ret;
+ sd->len -= ret;
+ sd->pos += ret;
+ sd->total_len -= ret;
- sd->len -= err;
- sd->pos += err;
- sd->total_len -= err;
- if (sd->len)
- continue;
+ if (!buf->len) {
+ buf->ops = NULL;
+ ops->release(pipe, buf);
+ pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
+ pipe->nrbufs--;
+ if (pipe->inode)
+ sd->need_wakeup = true;
+ }
- if (!buf->len) {
- buf->ops = NULL;
- ops->release(pipe, buf);
- pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
- pipe->nrbufs--;
- if (pipe->inode)
- do_wakeup = 1;
- }
+ if (!sd->total_len)
+ return 0;
+ }
- if (!sd->total_len)
- break;
- }
+ return 1;
+}
+EXPORT_SYMBOL(splice_from_pipe_feed);
- if (pipe->nrbufs)
- continue;
+/**
+ * splice_from_pipe_next - wait for some data to splice from
+ * @pipe: pipe to splice from
+ * @sd: information about the splice operation
+ *
+ * Description:
+ * This function will wait for some data and return a positive
+ * value (one) if pipe buffers are available. It will return zero
+ * or -errno if no more data needs to be spliced.
+ */
+int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+ while (!pipe->nrbufs) {
if (!pipe->writers)
- break;
- if (!pipe->waiting_writers) {
- if (ret)
- break;
- }
+ return 0;
- if (sd->flags & SPLICE_F_NONBLOCK) {
- if (!ret)
- ret = -EAGAIN;
- break;
- }
+ if (!pipe->waiting_writers && sd->num_spliced)
+ return 0;
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
- break;
- }
+ if (sd->flags & SPLICE_F_NONBLOCK)
+ return -EAGAIN;
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- do_wakeup = 0;
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ if (sd->need_wakeup) {
+ wakeup_pipe_writers(pipe);
+ sd->need_wakeup = false;
}
pipe_wait(pipe);
}
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- }
+ return 1;
+}
+EXPORT_SYMBOL(splice_from_pipe_next);
- return ret;
+/**
+ * splice_from_pipe_begin - start splicing from pipe
+ * @sd: information about the splice operation
+ *
+ * Description:
+ * This function should be called before a loop containing
+ * splice_from_pipe_next() and splice_from_pipe_feed() to
+ * initialize the necessary fields of @sd.
+ */
+void splice_from_pipe_begin(struct splice_desc *sd)
+{
+ sd->num_spliced = 0;
+ sd->need_wakeup = false;
+}
+EXPORT_SYMBOL(splice_from_pipe_begin);
+
+/**
+ * splice_from_pipe_end - finish splicing from pipe
+ * @pipe: pipe to splice from
+ * @sd: information about the splice operation
+ *
+ * Description:
+ * This function will wake up pipe writers if necessary. It should
+ * be called after a loop containing splice_from_pipe_next() and
+ * splice_from_pipe_feed().
+ */
+void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+ if (sd->need_wakeup)
+ wakeup_pipe_writers(pipe);
+}
+EXPORT_SYMBOL(splice_from_pipe_end);
+
+/**
+ * __splice_from_pipe - splice data from a pipe to given actor
+ * @pipe: pipe to splice from
+ * @sd: information to @actor
+ * @actor: handler that splices the data
+ *
+ * Description:
+ * This function does little more than loop over the pipe and call
+ * @actor to do the actual moving of a single struct pipe_buffer to
+ * the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ * pipe_to_user.
+ *
+ */
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
+ splice_actor *actor)
+{
+ int ret;
+
+ splice_from_pipe_begin(sd);
+ do {
+ ret = splice_from_pipe_next(pipe, sd);
+ if (ret > 0)
+ ret = splice_from_pipe_feed(pipe, sd, actor);
+ } while (ret > 0);
+ splice_from_pipe_end(pipe, sd);
+
+ return sd->num_spliced ? sd->num_spliced : ret;
}
EXPORT_SYMBOL(__splice_from_pipe);
@@ -715,7 +781,7 @@ EXPORT_SYMBOL(__splice_from_pipe);
* @actor: handler that splices the data
*
* Description:
- * See __splice_from_pipe. This function locks the input and output inodes,
+ * See __splice_from_pipe. This function locks the pipe inode,
* otherwise it's identical to __splice_from_pipe().
*
*/
@@ -724,7 +790,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
splice_actor *actor)
{
ssize_t ret;
- struct inode *inode = out->f_mapping->host;
struct splice_desc sd = {
.total_len = len,
.flags = flags,
@@ -732,30 +797,15 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
.u.file = out,
};
- /*
- * The actor worker might be calling ->write_begin and
- * ->write_end. Most of the time, these expect i_mutex to
- * be held. Since this may result in an ABBA deadlock with
- * pipe->inode, we have to order lock acquiry here.
- *
- * Outer lock must be inode->i_mutex, as pipe_wait() will
- * release and reacquire pipe->inode->i_mutex, AND inode must
- * never be a pipe.
- */
- WARN_ON(S_ISFIFO(inode->i_mode));
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- if (pipe->inode)
- mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
+ pipe_lock(pipe);
ret = __splice_from_pipe(pipe, &sd, actor);
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
- mutex_unlock(&inode->i_mutex);
+ pipe_unlock(pipe);
return ret;
}
/**
- * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
+ * generic_file_splice_write - splice data from a pipe to a file
* @pipe: pipe info
* @out: file to write to
* @ppos: position in @out
@@ -764,13 +814,12 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
*
* Description:
* Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file. The caller is responsible
- * for acquiring i_mutex on both inodes.
+ * the given pipe inode to the given file.
*
*/
ssize_t
-generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags)
+generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
{
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
@@ -781,76 +830,28 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
.u.file = out,
};
ssize_t ret;
- int err;
-
- err = file_remove_suid(out);
- if (unlikely(err))
- return err;
-
- ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
- if (ret > 0) {
- unsigned long nr_pages;
- *ppos += ret;
- nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
- /*
- * If file or inode is SYNC and we actually wrote some data,
- * sync it.
- */
- if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
- err = generic_osync_inode(inode, mapping,
- OSYNC_METADATA|OSYNC_DATA);
+ pipe_lock(pipe);
- if (err)
- ret = err;
- }
- balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
- }
+ splice_from_pipe_begin(&sd);
+ do {
+ ret = splice_from_pipe_next(pipe, &sd);
+ if (ret <= 0)
+ break;
- return ret;
-}
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ ret = file_remove_suid(out);
+ if (!ret)
+ ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
+ mutex_unlock(&inode->i_mutex);
+ } while (ret > 0);
+ splice_from_pipe_end(pipe, &sd);
-EXPORT_SYMBOL(generic_file_splice_write_nolock);
+ pipe_unlock(pipe);
-/**
- * generic_file_splice_write - splice data from a pipe to a file
- * @pipe: pipe info
- * @out: file to write to
- * @ppos: position in @out
- * @len: number of bytes to splice
- * @flags: splice modifier flags
- *
- * Description:
- * Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file.
- *
- */
-ssize_t
-generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags)
-{
- struct address_space *mapping = out->f_mapping;
- struct inode *inode = mapping->host;
- struct splice_desc sd = {
- .total_len = len,
- .flags = flags,
- .pos = *ppos,
- .u.file = out,
- };
- ssize_t ret;
+ if (sd.num_spliced)
+ ret = sd.num_spliced;
- WARN_ON(S_ISFIFO(inode->i_mode));
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- ret = file_remove_suid(out);
- if (likely(!ret)) {
- if (pipe->inode)
- mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
- ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
- }
- mutex_unlock(&inode->i_mutex);
if (ret > 0) {
unsigned long nr_pages;
@@ -1339,8 +1340,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
if (!pipe)
return -EBADF;
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
error = ret = 0;
while (nr_segs) {
@@ -1395,8 +1395,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
iov++;
}
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
if (!ret)
ret = error;
@@ -1524,7 +1523,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
return 0;
ret = 0;
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
while (!pipe->nrbufs) {
if (signal_pending(current)) {
@@ -1542,7 +1541,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
pipe_wait(pipe);
}
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
return ret;
}
@@ -1562,7 +1561,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
return 0;
ret = 0;
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
while (pipe->nrbufs >= PIPE_BUFFERS) {
if (!pipe->readers) {
@@ -1583,7 +1582,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
pipe->waiting_writers--;
}
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
return ret;
}
@@ -1599,10 +1598,10 @@ static int link_pipe(struct pipe_inode_info *ipipe,
/*
* Potential ABBA deadlock, work around it by ordering lock
- * grabbing by inode address. Otherwise two different processes
+ * grabbing by pipe info address. Otherwise two different processes
* could deadlock (one doing tee from A -> B, the other from B -> A).
*/
- inode_double_lock(ipipe->inode, opipe->inode);
+ pipe_double_lock(ipipe, opipe);
do {
if (!opipe->readers) {
@@ -1653,7 +1652,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
ret = -EAGAIN;
- inode_double_unlock(ipipe->inode, opipe->inode);
+ pipe_unlock(ipipe);
+ pipe_unlock(opipe);
/*
* If we put data in the output pipe, wakeup any potential readers.
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c13f67300fe..7ec89fc05b2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -153,23 +153,6 @@ xfs_find_bdev_for_inode(
}
/*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
- */
-STATIC void
-xfs_finish_ioend(
- xfs_ioend_t *ioend,
- int wait)
-{
- if (atomic_dec_and_test(&ioend->io_remaining)) {
- queue_work(xfsdatad_workqueue, &ioend->io_work);
- if (wait)
- flush_workqueue(xfsdatad_workqueue);
- }
-}
-
-/*
* We're now finished for good with this ioend structure.
* Update the page state via the associated buffer_heads,
* release holds on the inode and bio, and finally free
@@ -310,6 +293,27 @@ xfs_end_bio_read(
}
/*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
+ */
+STATIC void
+xfs_finish_ioend(
+ xfs_ioend_t *ioend,
+ int wait)
+{
+ if (atomic_dec_and_test(&ioend->io_remaining)) {
+ struct workqueue_struct *wq = xfsdatad_workqueue;
+ if (ioend->io_work.func == xfs_end_bio_unwritten)
+ wq = xfsconvertd_workqueue;
+
+ queue_work(wq, &ioend->io_work);
+ if (wait)
+ flush_workqueue(wq);
+ }
+}
+
+/*
* Allocate and initialise an IO completion structure.
* We need to track unwritten extent write completion here initially.
* We'll need to extend this for updating the ondisk inode size later
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 1dd52884975..221b3e66cee 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -19,6 +19,7 @@
#define __XFS_AOPS_H__
extern struct workqueue_struct *xfsdatad_workqueue;
+extern struct workqueue_struct *xfsconvertd_workqueue;
extern mempool_t *xfs_ioend_pool;
/*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aa1016bb913..e28800a9f2b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = {
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
#ifdef XFS_BUF_TRACE
void
@@ -1775,6 +1776,7 @@ xfs_flush_buftarg(
xfs_buf_t *bp, *n;
int pincount = 0;
+ xfs_buf_runall_queues(xfsconvertd_workqueue);
xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue);
@@ -1831,9 +1833,15 @@ xfs_buf_init(void)
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
+ xfsconvertd_workqueue = create_workqueue("xfsconvertd");
+ if (!xfsconvertd_workqueue)
+ goto out_destroy_xfsdatad_workqueue;
+
register_shrinker(&xfs_buf_shake);
return 0;
+ out_destroy_xfsdatad_workqueue:
+ destroy_workqueue(xfsdatad_workqueue);
out_destroy_xfslogd_workqueue:
destroy_workqueue(xfslogd_workqueue);
out_free_buf_zone:
@@ -1849,6 +1857,7 @@ void
xfs_buf_terminate(void)
{
unregister_shrinker(&xfs_buf_shake);
+ destroy_workqueue(xfsconvertd_workqueue);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 5aeb7777696..08be36d7326 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -74,14 +74,14 @@ xfs_flush_pages(
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
xfs_iflags_clear(ip, XFS_ITRUNCATED);
- ret = filemap_fdatawrite(mapping);
- if (flags & XFS_B_ASYNC)
- return -ret;
- ret2 = filemap_fdatawait(mapping);
- if (!ret)
- ret = ret2;
+ ret = -filemap_fdatawrite(mapping);
}
- return -ret;
+ if (flags & XFS_B_ASYNC)
+ return ret;
+ ret2 = xfs_wait_on_pages(ip, first, last);
+ if (!ret)
+ ret = ret2;
+ return ret;
}
int
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7e90daa0d1d..9142192ccbe 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -751,10 +751,26 @@ start:
goto relock;
}
} else {
+ int enospc = 0;
+ ssize_t ret2 = 0;
+
+write_retry:
xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
*offset, ioflags);
- ret = generic_file_buffered_write(iocb, iovp, segs,
+ ret2 = generic_file_buffered_write(iocb, iovp, segs,
pos, offset, count, ret);
+ /*
+ * if we just got an ENOSPC, flush the inode now we
+ * aren't holding any page locks and retry *once*
+ */
+ if (ret2 == -ENOSPC && !enospc) {
+ error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
+ if (error)
+ goto out_unlock_internal;
+ enospc = 1;
+ goto write_retry;
+ }
+ ret = ret2;
}
current->backing_dev_info = NULL;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a608e72fa40..f7ba76633c2 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -62,12 +62,6 @@ xfs_sync_inodes_ag(
uint32_t first_index = 0;
int error = 0;
int last_error = 0;
- int fflag = XFS_B_ASYNC;
-
- if (flags & SYNC_DELWRI)
- fflag = XFS_B_DELWRI;
- if (flags & SYNC_WAIT)
- fflag = 0; /* synchronous overrides all */
do {
struct inode *inode;
@@ -128,11 +122,23 @@ xfs_sync_inodes_ag(
* If we have to flush data or wait for I/O completion
* we need to hold the iolock.
*/
- if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- lock_flags |= XFS_IOLOCK_SHARED;
- error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
- if (flags & SYNC_IOWAIT)
+ if (flags & SYNC_DELWRI) {
+ if (VN_DIRTY(inode)) {
+ if (flags & SYNC_TRYLOCK) {
+ if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
+ lock_flags |= XFS_IOLOCK_SHARED;
+ } else {
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ lock_flags |= XFS_IOLOCK_SHARED;
+ }
+ if (lock_flags & XFS_IOLOCK_SHARED) {
+ error = xfs_flush_pages(ip, 0, -1,
+ (flags & SYNC_WAIT) ? 0
+ : XFS_B_ASYNC,
+ FI_NONE);
+ }
+ }
+ if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
xfs_ioend_wait(ip);
}
xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -398,15 +404,17 @@ STATIC void
xfs_syncd_queue_work(
struct xfs_mount *mp,
void *data,
- void (*syncer)(struct xfs_mount *, void *))
+ void (*syncer)(struct xfs_mount *, void *),
+ struct completion *completion)
{
- struct bhv_vfs_sync_work *work;
+ struct xfs_sync_work *work;
- work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
+ work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
INIT_LIST_HEAD(&work->w_list);
work->w_syncer = syncer;
work->w_data = data;
work->w_mount = mp;
+ work->w_completion = completion;
spin_lock(&mp->m_sync_lock);
list_add_tail(&work->w_list, &mp->m_sync_list);
spin_unlock(&mp->m_sync_lock);
@@ -420,49 +428,26 @@ xfs_syncd_queue_work(
* heads, looking about for more room...
*/
STATIC void
-xfs_flush_inode_work(
- struct xfs_mount *mp,
- void *arg)
-{
- struct inode *inode = arg;
- filemap_flush(inode->i_mapping);
- iput(inode);
-}
-
-void
-xfs_flush_inode(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- igrab(inode);
- xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
- delay(msecs_to_jiffies(500));
-}
-
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
-xfs_flush_device_work(
+xfs_flush_inodes_work(
struct xfs_mount *mp,
void *arg)
{
struct inode *inode = arg;
- sync_blockdev(mp->m_super->s_bdev);
+ xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
+ xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
iput(inode);
}
void
-xfs_flush_device(
+xfs_flush_inodes(
xfs_inode_t *ip)
{
struct inode *inode = VFS_I(ip);
+ DECLARE_COMPLETION_ONSTACK(completion);
igrab(inode);
- xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
- delay(msecs_to_jiffies(500));
+ xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
+ wait_for_completion(&completion);
xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
}
@@ -497,7 +482,7 @@ xfssyncd(
{
struct xfs_mount *mp = arg;
long timeleft;
- bhv_vfs_sync_work_t *work, *n;
+ xfs_sync_work_t *work, *n;
LIST_HEAD (tmp);
set_freezable();
@@ -532,6 +517,8 @@ xfssyncd(
list_del(&work->w_list);
if (work == &mp->m_sync_work)
continue;
+ if (work->w_completion)
+ complete(work->w_completion);
kmem_free(work);
}
}
@@ -545,6 +532,7 @@ xfs_syncd_init(
{
mp->m_sync_work.w_syncer = xfs_sync_worker;
mp->m_sync_work.w_mount = mp;
+ mp->m_sync_work.w_completion = NULL;
mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
if (IS_ERR(mp->m_sync_task))
return -PTR_ERR(mp->m_sync_task);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 04f058c848a..308d5bf6dfb 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,18 +21,20 @@
struct xfs_mount;
struct xfs_perag;
-typedef struct bhv_vfs_sync_work {
+typedef struct xfs_sync_work {
struct list_head w_list;
struct xfs_mount *w_mount;
void *w_data; /* syncer routine argument */
void (*w_syncer)(struct xfs_mount *, void *);
-} bhv_vfs_sync_work_t;
+ struct completion *w_completion;
+} xfs_sync_work_t;
#define SYNC_ATTR 0x0001 /* sync attributes */
#define SYNC_DELWRI 0x0002 /* look at delayed writes */
#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
+#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */
int xfs_syncd_init(struct xfs_mount *mp);
void xfs_syncd_stop(struct xfs_mount *mp);
@@ -43,8 +45,7 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
int xfs_quiesce_data(struct xfs_mount *mp);
void xfs_quiesce_attr(struct xfs_mount *mp);
-void xfs_flush_inode(struct xfs_inode *ip);
-void xfs_flush_device(struct xfs_inode *ip);
+void xfs_flush_inodes(struct xfs_inode *ip);
int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 478e587087f..89b81eedce6 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -69,15 +69,6 @@ xfs_inode_alloc(
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
- /*
- * initialise the VFS inode here to get failures
- * out of the way early.
- */
- if (!inode_init_always(mp->m_super, VFS_I(ip))) {
- kmem_zone_free(xfs_inode_zone, ip);
- return NULL;
- }
-
/* initialise the xfs inode */
ip->i_ino = ino;
ip->i_mount = mp;
@@ -113,6 +104,20 @@ xfs_inode_alloc(
#ifdef XFS_DIR2_TRACE
ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
#endif
+ /*
+ * Now initialise the VFS inode. We do this after the xfs_inode
+ * initialisation as internal failures will result in ->destroy_inode
+ * being called and that will pass down through the reclaim path and
+ * free the XFS inode. This path requires the XFS inode to already be
+ * initialised. Hence if this call fails, the xfs_inode has already
+ * been freed and we should not reference it at all in the error
+ * handling.
+ */
+ if (!inode_init_always(mp->m_super, VFS_I(ip)))
+ return NULL;
+
+ /* prevent anyone from using this yet */
+ VFS_I(ip)->i_state = I_NEW|I_LOCK;
return ip;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 08ce72316bf..5aaa2d7ec15 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -338,38 +338,6 @@ xfs_iomap_eof_align_last_fsb(
}
STATIC int
-xfs_flush_space(
- xfs_inode_t *ip,
- int *fsynced,
- int *ioflags)
-{
- switch (*fsynced) {
- case 0:
- if (ip->i_delayed_blks) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_flush_inode(ip);
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- *fsynced = 1;
- } else {
- *ioflags |= BMAPI_SYNC;
- *fsynced = 2;
- }
- return 0;
- case 1:
- *fsynced = 2;
- *ioflags |= BMAPI_SYNC;
- return 0;
- case 2:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_flush_device(ip);
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- *fsynced = 3;
- return 0;
- }
- return 1;
-}
-
-STATIC int
xfs_cmn_err_fsblock_zero(
xfs_inode_t *ip,
xfs_bmbt_irec_t *imap)
@@ -538,15 +506,9 @@ error_out:
}
/*
- * If the caller is doing a write at the end of the file,
- * then extend the allocation out to the file system's write
- * iosize. We clean up any extra space left over when the
- * file is closed in xfs_inactive().
- *
- * For sync writes, we are flushing delayed allocate space to
- * try to make additional space available for allocation near
- * the filesystem full boundary - preallocation hurts in that
- * situation, of course.
+ * If the caller is doing a write at the end of the file, then extend the
+ * allocation out to the file system's write iosize. We clean up any extra
+ * space left over when the file is closed in xfs_inactive().
*/
STATIC int
xfs_iomap_eof_want_preallocate(
@@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate(
int n, error, imaps;
*prealloc = 0;
- if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size)
+ if ((offset + count) <= ip->i_size)
return 0;
/*
@@ -611,7 +573,7 @@ xfs_iomap_write_delay(
xfs_extlen_t extsz;
int nimaps;
xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
- int prealloc, fsynced = 0;
+ int prealloc, flushed = 0;
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -627,12 +589,12 @@ xfs_iomap_write_delay(
extsz = xfs_get_extsz_hint(ip);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
-retry:
error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
if (error)
return error;
+retry:
if (prealloc) {
aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
@@ -659,15 +621,22 @@ retry:
/*
* If bmapi returned us nothing, and if we didn't get back EDQUOT,
- * then we must have run out of space - flush delalloc, and retry..
+ * then we must have run out of space - flush all other inodes with
+ * delalloc blocks and retry without EOF preallocation.
*/
if (nimaps == 0) {
xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
ip, offset, count);
- if (xfs_flush_space(ip, &fsynced, &ioflag))
+ if (flushed)
return XFS_ERROR(ENOSPC);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_flush_inodes(ip);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ flushed = 1;
error = 0;
+ prealloc = 0;
goto retry;
}
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index a1cc1322fc0..fdcf7b82747 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -40,8 +40,7 @@ typedef enum {
BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */
BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */
BMAPI_MMAP = (1 << 6), /* allocate for mmap write */
- BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */
- BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */
+ BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */
} bmapi_flags_t;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f76c6d7cea2..3750f04ede0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -562,9 +562,8 @@ xfs_log_mount(
}
mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
- if (!mp->m_log) {
- cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!");
- error = ENOMEM;
+ if (IS_ERR(mp->m_log)) {
+ error = -PTR_ERR(mp->m_log);
goto out;
}
@@ -1180,10 +1179,13 @@ xlog_alloc_log(xfs_mount_t *mp,
xfs_buf_t *bp;
int i;
int iclogsize;
+ int error = ENOMEM;
log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
- if (!log)
- return NULL;
+ if (!log) {
+ xlog_warn("XFS: Log allocation failed: No memory!");
+ goto out;
+ }
log->l_mp = mp;
log->l_targ = log_target;
@@ -1201,19 +1203,35 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_grant_reserve_cycle = 1;
log->l_grant_write_cycle = 1;
+ error = EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
- ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
+ if (log->l_sectbb_log < 0 ||
+ log->l_sectbb_log > mp->m_sectbb_log) {
+ xlog_warn("XFS: Log sector size (0x%x) out of range.",
+ log->l_sectbb_log);
+ goto out_free_log;
+ }
+
/* for larger sector sizes, must have v2 or external log */
- ASSERT(log->l_sectbb_log == 0 ||
- log->l_logBBstart == 0 ||
- xfs_sb_version_haslogv2(&mp->m_sb));
- ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
+ if (log->l_sectbb_log != 0 &&
+ (log->l_logBBstart != 0 &&
+ !xfs_sb_version_haslogv2(&mp->m_sb))) {
+ xlog_warn("XFS: log sector size (0x%x) invalid "
+ "for configuration.", log->l_sectbb_log);
+ goto out_free_log;
+ }
+ if (mp->m_sb.sb_logsectlog < BBSHIFT) {
+ xlog_warn("XFS: Log sector log (0x%x) too small.",
+ mp->m_sb.sb_logsectlog);
+ goto out_free_log;
+ }
}
log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
xlog_get_iclog_buffer_size(mp, log);
+ error = ENOMEM;
bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
if (!bp)
goto out_free_log;
@@ -1313,7 +1331,8 @@ out_free_iclog:
xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
- return NULL;
+out:
+ return ERR_PTR(-error);
} /* xlog_alloc_log */
@@ -2541,18 +2560,19 @@ redo:
xlog_ins_ticketq(&log->l_reserve_headq, tic);
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: sleep 2");
+ spin_unlock(&log->l_grant_lock);
+ xlog_grant_push_ail(log->l_mp, need_bytes);
+ spin_lock(&log->l_grant_lock);
+
XFS_STATS_INC(xs_sleep_logspace);
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
- if (XLOG_FORCED_SHUTDOWN(log)) {
- spin_lock(&log->l_grant_lock);
+ spin_lock(&log->l_grant_lock);
+ if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- }
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: wake 2");
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
goto redo;
} else if (tic->t_flags & XLOG_TIC_IN_Q)
xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2631,7 +2651,7 @@ xlog_regrant_write_log_space(xlog_t *log,
* for more free space, otherwise try to get some space for
* this transaction.
*/
-
+ need_bytes = tic->t_unit_res;
if ((ntic = log->l_write_headq)) {
free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
log->l_grant_write_bytes);
@@ -2651,26 +2671,25 @@ xlog_regrant_write_log_space(xlog_t *log,
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: sleep 1");
+ spin_unlock(&log->l_grant_lock);
+ xlog_grant_push_ail(log->l_mp, need_bytes);
+ spin_lock(&log->l_grant_lock);
+
XFS_STATS_INC(xs_sleep_logspace);
sv_wait(&tic->t_wait, PINOD|PLTWAIT,
&log->l_grant_lock, s);
/* If we're shutting down, this tic is already
* off the queue */
- if (XLOG_FORCED_SHUTDOWN(log)) {
- spin_lock(&log->l_grant_lock);
+ spin_lock(&log->l_grant_lock);
+ if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- }
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: wake 1");
- xlog_grant_push_ail(log->l_mp, tic->t_unit_res);
- spin_lock(&log->l_grant_lock);
}
}
- need_bytes = tic->t_unit_res;
-
redo:
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
@@ -2680,19 +2699,20 @@ redo:
if (free_bytes < need_bytes) {
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
xlog_ins_ticketq(&log->l_write_headq, tic);
+ spin_unlock(&log->l_grant_lock);
+ xlog_grant_push_ail(log->l_mp, need_bytes);
+ spin_lock(&log->l_grant_lock);
+
XFS_STATS_INC(xs_sleep_logspace);
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/* If we're shutting down, this tic is already off the queue */
- if (XLOG_FORCED_SHUTDOWN(log)) {
- spin_lock(&log->l_grant_lock);
+ spin_lock(&log->l_grant_lock);
+ if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- }
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: wake 2");
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
goto redo;
} else if (tic->t_flags & XLOG_TIC_IN_Q)
xlog_del_ticketq(&log->l_write_headq, tic);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7af44adffc8..d6a64392f98 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -313,7 +313,7 @@ typedef struct xfs_mount {
#endif
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct task_struct *m_sync_task; /* generalised sync thread */
- bhv_vfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */
+ xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */
struct list_head m_sync_list; /* sync thread work item list */
spinlock_t m_sync_lock; /* work item list lock */
int m_sync_seq; /* sync thread generation no. */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 7394c7af5de..19cf90a9c76 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1457,6 +1457,13 @@ xfs_create(
error = xfs_trans_reserve(tp, resblks, log_res, 0,
XFS_TRANS_PERM_LOG_RES, log_count);
if (error == ENOSPC) {
+ /* flush outstanding delalloc blocks and retry */
+ xfs_flush_inodes(dp);
+ error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+ }
+ if (error == ENOSPC) {
+ /* No space at all so try a "no-allocation" reservation */
resblks = 0;
error = xfs_trans_reserve(tp, 0, log_res, 0,
XFS_TRANS_PERM_LOG_RES, log_count);