From 2c1740098c708b465e87637b237feb2fd98f129a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:27 -0500 Subject: NFS: Fix a bug in nfs_fscache_release_page() Not having an fscache cookie is perfectly valid if the user didn't mount with the fscache option. This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=15234 Signed-off-by: Trond Myklebust Acked-by: David Howells Cc: stable@kernel.org --- fs/nfs/fscache.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index fa588006588..237874f1af2 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -354,12 +354,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) */ int nfs_fscache_release_page(struct page *page, gfp_t gfp) { - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - struct fscache_cookie *cookie = nfsi->fscache; - - BUG_ON(!cookie); - if (PageFsCache(page)) { + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + struct fscache_cookie *cookie = nfsi->fscache; + + BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", cookie, page, nfsi); -- cgit v1.2.3 From 7549ad5f9b6eda49bbac4b14c5b8f37bf464f922 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:34 -0500 Subject: NFS: Remove a redundant check for PageFsCache in nfs_migrate_page() Signed-off-by: Trond Myklebust Acked-by: David Howells --- fs/nfs/write.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b54b8bb101..d63d964a039 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1598,8 +1598,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct nfs_page *req; int ret; - if (PageFsCache(page)) - nfs_fscache_release_page(page, GFP_KERNEL); + nfs_fscache_release_page(page, GFP_KERNEL); req = nfs_find_and_lock_request(page); ret = PTR_ERR(req); -- cgit v1.2.3 From fdcb45777a3d1689c5541e1f85ee3ebbd197d2c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 8 Feb 2010 09:32:40 -0500 Subject: NFS: Fix the mapping of the NFSERR_SERVERFAULT error It was recently pointed out that the NFSERR_SERVERFAULT error, which is designed to inform the user of a serious internal error on the server, was being mapped to an error value that is internal to the kernel. This patch maps it to the error EREMOTEIO, which is exported to userland through errno.h. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/mount_clnt.c | 2 +- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs4xdr.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 0adefc40cc8..59047f8d7d7 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -120,7 +120,7 @@ static struct { { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, - { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, }, + { .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, }, }; struct mountres { diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5e078b222b4..7bc2da8efd4 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -699,7 +699,7 @@ static struct { { NFSERR_BAD_COOKIE, -EBADCOOKIE }, { NFSERR_NOTSUPP, -ENOTSUPP }, { NFSERR_TOOSMALL, -ETOOSMALL }, - { NFSERR_SERVERFAULT, -ESERVERFAULT }, + { NFSERR_SERVERFAULT, -EREMOTEIO }, { NFSERR_BADTYPE, -EBADTYPE }, { NFSERR_JUKEBOX, -EJUKEBOX }, { -1, -EIO } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e437fd6a819..5cd5184b56d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4631,7 +4631,7 @@ static int decode_sequence(struct xdr_stream *xdr, * If the server returns different values for sessionID, slotID or * sequence number, the server is looney tunes. */ - status = -ESERVERFAULT; + status = -EREMOTEIO; if (memcmp(id.data, res->sr_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { @@ -5774,7 +5774,7 @@ static struct { { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, { NFS4ERR_NOTSUPP, -ENOTSUPP }, { NFS4ERR_TOOSMALL, -ETOOSMALL }, - { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_SERVERFAULT, -EREMOTEIO }, { NFS4ERR_BADTYPE, -EBADTYPE }, { NFS4ERR_LOCKED, -EAGAIN }, { NFS4ERR_SYMLINK, -ELOOP }, @@ -5801,7 +5801,7 @@ nfs4_stat_to_errno(int stat) } if (stat <= 10000 || stat > 10100) { /* The server is looney tunes. */ - return -ESERVERFAULT; + return -EREMOTEIO; } /* If we cannot translate the error, the recovery routines should * handle it. -- cgit v1.2.3 From 65d269538a1129495ac45a14a777cd11cfe881d8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 15 Feb 2010 12:19:53 -0500 Subject: NFS: Too many GETATTR and ACCESS calls after direct I/O The cached read and write paths initialize fattr->time_start in their setup procedures. The value of fattr->time_start is propagated to read_cache_jiffies by nfs_update_inode(). Subsequent calls to nfs_attribute_timeout() will then use a good time stamp when computing the attribute cache timeout, and squelch unneeded GETATTR calls. Since the direct I/O paths erroneously leave the inode's fattr->time_start field set to zero, read_cache_jiffies for that inode is set to zero after any direct read or write operation. This triggers an otw GETATTR or ACCESS call to update the file's attribute and access caches properly, even when the NFS READ or WRITE replies have usable post-op attributes. Make sure the direct read and write setup code performs the same fattr initialization as the cached I/O paths to prevent unnecessary GETATTR calls. This was likely introduced by commit 0e574af1 in 2.6.15, which appears to add new nfs_fattr_init() call sites in the cached read and write paths, but not in the equivalent places in fs/nfs/direct.c. A subsequent commit in the same series, 33801147, introduces the fattr->time_start field. Interestingly, the direct write reschedule path already has a call to nfs_fattr_init() in the right place. Reported-by: Quentin Barnes Signed-off-by: Chuck Lever Cc: stable@kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/direct.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e1d415e9784..0d289823e85 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -342,6 +342,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.eof = 0; data->res.count = bytes; + nfs_fattr_init(&data->fattr); msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; @@ -575,6 +576,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); NFS_PROTO(data->inode)->commit_setup(data, &msg); @@ -766,6 +768,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->res.fattr = &data->fattr; data->res.count = bytes; data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); task_setup_data.task = &data->task; task_setup_data.callback_data = data; -- cgit v1.2.3 From 003cb608a2533d0927a83bc4e07e46d7a622eda9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Feb 2010 14:39:01 +0900 Subject: percpu: add __percpu sparse annotations to fs Add __percpu sparse annotations to fs. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. Signed-off-by: Tejun Heo Cc: "Theodore Ts'o" Cc: Trond Myklebust Cc: Alex Elder Cc: Christoph Hellwig Cc: Alexander Viro --- fs/nfs/iostat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 46d779abafd..1d8d5c813b0 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -57,12 +57,12 @@ static inline void nfs_add_fscache_stats(struct inode *inode, } #endif -static inline struct nfs_iostats *nfs_alloc_iostats(void) +static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) { return alloc_percpu(struct nfs_iostats); } -static inline void nfs_free_iostats(struct nfs_iostats *stats) +static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) { if (stats != NULL) free_percpu(stats); -- cgit v1.2.3 From 4912002fffa377e66c5caefc2c311732a4ad5fb8 Mon Sep 17 00:00:00 2001 From: Christian Kujau Date: Fri, 26 Feb 2010 17:25:14 +0000 Subject: Remove EXPERIMENTAL from NFS_FSCACHE There's currently an open Ubuntu bug[0], with the intent to compile NFS_FSCACHE (and possibly AFS_FSCACHE, 9P_FSCACHE) into the standard Ubuntu kernel. However, since *_FSCACHE still depends on EXPERIMENTAL, this won't happen. As Arjan van de Ven pointed out[1], the EXPERIMENTAL flag doesn't mean that much any more, I propose the following patch to fs/nfs/Kconfig. I'd do the same for fs/9p/Kconfig and fs/afs/Kconfig, but as I did not test 9p or AFS, I feel it would not be appropriate for me to remove the flag. [0] https://bugs.launchpad.net/ubuntu/+source/linux/+bug/440522/comments/5 [1] http://lkml.org/lkml/2010/1/23/145 Signed-off-by: Christian Kujau Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/nfs/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 59e5673b459..a43d07e7b92 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -95,8 +95,7 @@ config ROOT_NFS Most people say N here. config NFS_FSCACHE - bool "Provide NFS client caching support (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "Provide NFS client caching support" depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y help Say Y here if you want NFS data to be cached locally on disc through -- cgit v1.2.3 From 6eae7974d0490a9dbc3091f702ea1650871652a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jan 2010 13:44:07 -0500 Subject: Switch alloc_nfs_open_context() to struct path Signed-off-by: Al Viro --- fs/nfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f141bde7756..7570573bdb3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -574,14 +574,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) nfs_revalidate_inode(server, inode); } -static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) +static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) { struct nfs_open_context *ctx; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { - ctx->path.dentry = dget(dentry); - ctx->path.mnt = mntget(mnt); + ctx->path = *path; + path_get(&ctx->path); ctx->cred = get_rpccred(cred); ctx->state = NULL; ctx->lockowner = current->files; @@ -686,7 +686,7 @@ int nfs_open(struct inode *inode, struct file *filp) cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); - ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); + ctx = alloc_nfs_open_context(&filp->f_path, cred); put_rpccred(cred); if (ctx == NULL) return -ENOMEM; -- cgit v1.2.3 From f694869709cc39a5fbde21aa40f22999ddad0e6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jan 2010 13:51:04 -0500 Subject: a couple of mntget+dget -> path_get in nfs4proc Signed-off-by: Al Viro --- fs/nfs/nfs4proc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 375f0fae2c6..84d83be25a9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -724,8 +724,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (p->o_arg.seqid == NULL) goto err_free; - p->path.mnt = mntget(path->mnt); - p->path.dentry = dget(path->dentry); + path_get(path); + p->path = *path; p->dir = parent; p->owner = sp; atomic_inc(&sp->so_count); @@ -1944,8 +1944,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; - calldata->path.mnt = mntget(path->mnt); - calldata->path.dentry = dget(path->dentry); + path_get(path); + calldata->path = *path; msg.rpc_argp = &calldata->arg, msg.rpc_resp = &calldata->res, -- cgit v1.2.3 From 26821ed40b4230259e770c9911180f38fcaa6f59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:21 +0100 Subject: make sure data is on disk before calling ->write_inode Similar to the fsync issue fixed a while ago in commit 2daea67e966dc0c42067ebea015ddac6834cef88 we need to write for data to actually hit the disk before writing out the metadata to guarantee data integrity for filesystems that modify the inode in the data I/O completion path. Currently XFS and NFS handle this manually, and AFS has a write_inode method that does nothing but waiting for data, while others are possibly missing out on this. Fortunately this change has a lot less impact than the fsync change as none of the write_inode methods starts data writeout of any form by itself. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/nfs/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7570573bdb3..5ecd952cae1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -101,12 +101,7 @@ int nfs_write_inode(struct inode *inode, int sync) { int ret; - if (sync) { - ret = filemap_fdatawait(inode->i_mapping); - if (ret == 0) - ret = nfs_commit_inode(inode, FLUSH_SYNC); - } else - ret = nfs_commit_inode(inode, 0); + ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0); if (ret >= 0) return 0; __mark_inode_dirty(inode, I_DIRTY_DATASYNC); -- cgit v1.2.3 From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:37 +0100 Subject: pass writeback_control to ->write_inode This gives the filesystem more information about the writeback that is happening. Trond requested this for the NFS unstable write handling, and other filesystems might benefit from this too by beeing able to distinguish between the different callers in more detail. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/nfs/inode.c | 5 +++-- fs/nfs/internal.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5ecd952cae1..7f9ecc46f3f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,11 +97,12 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, int sync) +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; - ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0); + ret = nfs_commit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); if (ret >= 0) return 0; __mark_inode_dirty(inode, I_DIRTY_DATASYNC); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 29e464d23b3..11f82f03c5d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -211,7 +211,7 @@ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); -extern int nfs_write_inode(struct inode *,int); +extern int nfs_write_inode(struct inode *, struct writeback_control *); extern void nfs_clear_inode(struct inode *); #ifdef CONFIG_NFS_V4 extern void nfs4_clear_inode(struct inode *); -- cgit v1.2.3 From 8fc795f703c5138e1a8bfb88c69f52632031aa6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 16:46:56 -0800 Subject: NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c The sole purpose of nfs_write_inode is to commit unstable writes, so move it into fs/nfs/write.c, and make nfs_commit_inode static. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 12 ------------ fs/nfs/write.c | 24 +++++++++++++++++++++++- 2 files changed, 23 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7f9ecc46f3f..89e98312599 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid) return ino; } -int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - int ret; - - ret = nfs_commit_inode(inode, - wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); - if (ret >= 0) - return 0; - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return ret; -} - void nfs_clear_inode(struct inode *inode) { /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d63d964a039..09e97097baa 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; -int nfs_commit_inode(struct inode *inode, int how) +static int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); int res; @@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how) } return res; } + +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) +{ + int ret; + + ret = nfs_commit_inode(inode, + wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); + if (ret >= 0) + return 0; + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + return ret; +} #else static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) { return 0; } + +static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) +{ + return 0; +} #endif +int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return nfs_commit_unstable_pages(inode, wbc); +} + long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) { struct inode *inode = mapping->host; -- cgit v1.2.3 From ff778d02bf867e1733a09b34ad6dbb723b024814 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 16:53:39 -0800 Subject: NFS: Add a count of the number of unstable writes carried by an inode In order to know when we should do opportunistic commits of the unstable writes, when the VM is doing a background flush, we add a field to count the number of unstable writes. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + fs/nfs/write.c | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 89e98312599..aa5a831001a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1404,6 +1404,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); nfsi->npages = 0; + nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); init_waitqueue_head(&nfsi->waitqueue); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 09e97097baa..dc08a6fbde6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -438,6 +438,7 @@ nfs_mark_request_commit(struct nfs_page *req) radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_COMMIT); + nfsi->ncommit++; spin_unlock(&inode->i_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); @@ -573,11 +574,15 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); + int ret; if (!nfs_need_commit(nfsi)) return 0; - return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); + if (ret > 0) + nfsi->ncommit -= ret; + return ret; } #else static inline int nfs_need_commit(struct nfs_inode *nfsi) @@ -642,9 +647,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - if (nfs_clear_request_commit(req)) - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT); + if (nfs_clear_request_commit(req) && + radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) + NFS_I(inode)->ncommit--; /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { -- cgit v1.2.3 From 420e3646bb7d93a571734034249fbb1ae1a7a5c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:00:02 -0800 Subject: NFS: Reduce the number of unnecessary COMMIT calls If the caller is doing a non-blocking flush, and there are still writebacks pending on the wire, we can usually defer the COMMIT call until those writes are done. Also ensure that we honour the wbc->nonblocking flag. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc08a6fbde6..fc05e35da6a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1415,12 +1415,30 @@ static int nfs_commit_inode(struct inode *inode, int how) static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) { - int ret; + struct nfs_inode *nfsi = NFS_I(inode); + int flags = FLUSH_SYNC; + int ret = 0; - ret = nfs_commit_inode(inode, - wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0); - if (ret >= 0) + /* Don't commit yet if this is a non-blocking flush and there are + * lots of outstanding writes for this mapping. + */ + if (wbc->sync_mode == WB_SYNC_NONE && + nfsi->ncommit <= (nfsi->npages >> 1)) + goto out_mark_dirty; + + if (wbc->nonblocking) + flags = 0; + ret = nfs_commit_inode(inode, flags); + if (ret >= 0) { + if (wbc->sync_mode == WB_SYNC_NONE) { + if (ret < wbc->nr_to_write) + wbc->nr_to_write -= ret; + else + wbc->nr_to_write = 0; + } return 0; + } +out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } -- cgit v1.2.3 From 5bad5abec4058c5214bfc72cec418348d6747977 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:02:24 -0800 Subject: NFS: Run COMMIT as an asynchronous RPC call when wbc->for_background is set Signed-off-by: Trond Myklebust Acked-by: Peter Zijlstra Acked-by: Wu Fengguang --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fc05e35da6a..704e67d392e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1426,7 +1426,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr nfsi->ncommit <= (nfsi->npages >> 1)) goto out_mark_dirty; - if (wbc->nonblocking) + if (wbc->nonblocking || wbc->for_background) flags = 0; ret = nfs_commit_inode(inode, flags); if (ret >= 0) { -- cgit v1.2.3 From 2928db1ffeacc9717c2d5c230d450bcc377b3ae9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:18 -0800 Subject: NFS: Ensure inode is always marked I_DIRTY_DATASYNC, if it has unstable pages Since nfs_scan_list() doesn't wait for locked pages, we have a race in which it is possible to end up with an inode that needs to send a COMMIT, but which does not have the I_DIRTY_DATASYNC flag set. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 704e67d392e..e40e949598f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -582,6 +582,8 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); if (ret > 0) nfsi->ncommit -= ret; + if (nfs_need_commit(NFS_I(inode))) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; } #else -- cgit v1.2.3 From c988950eb6dd6f8e6d98503ca094622729e9aa13 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:21 -0800 Subject: NFS: Simplify nfs_wb_page_cancel() In all cases we should be able to just remove the request and call cancel_dirty_page(). Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 39 +-------------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e40e949598f..dc7f5e9a23b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -540,19 +540,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u return res; } -static void nfs_cancel_commit_list(struct list_head *head) -{ - struct nfs_page *req; - - while(!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_clear_request_commit(req); - nfs_inode_remove_request(req); - nfs_unlock_request(req); - } -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_need_commit(struct nfs_inode *nfsi) @@ -1495,13 +1482,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr pages = nfs_scan_commit(inode, &head, idx_start, npages); if (pages == 0) break; - if (how & FLUSH_INVALIDATE) { - spin_unlock(&inode->i_lock); - nfs_cancel_commit_list(&head); - ret = pages; - spin_lock(&inode->i_lock); - continue; - } pages += nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&inode->i_lock); ret = nfs_commit_list(inode, &head, how); @@ -1558,26 +1538,13 @@ int nfs_wb_nocommit(struct inode *inode) int nfs_wb_page_cancel(struct inode *inode, struct page *page) { struct nfs_page *req; - loff_t range_start = page_offset(page); - loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); - struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .range_start = range_start, - .range_end = range_end, - }; int ret = 0; BUG_ON(!PageLocked(page)); for (;;) { req = nfs_page_find_request(page); if (req == NULL) - goto out; - if (test_bit(PG_CLEAN, &req->wb_flags)) { - nfs_release_request(req); break; - } if (nfs_lock_request_dontget(req)) { nfs_inode_remove_request(req); /* @@ -1591,12 +1558,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret < 0) - goto out; + break; } - if (!PagePrivate(page)) - return 0; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); -out: return ret; } -- cgit v1.2.3 From acdc53b2146c7ee67feb1f02f7bc3020126514b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:26 -0800 Subject: NFS: Replace __nfs_write_mapping with sync_inode() Now that we have correct COMMIT semantics in writeback_single_inode, we can reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a call to filemap_write_and_wait(), which doesn't need to hold the inode->i_mutex. With that done, we can eliminate nfs_write_mapping() altogether. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 15 +++++---------- fs/nfs/write.c | 42 +++++------------------------------------- 2 files changed, 10 insertions(+), 47 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aa5a831001a..443772df9b1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; - /* - * Flush out writes to the server in order to update c/mtime. - * - * Hold the i_mutex to suspend application writes temporarily; - * this prevents long-running writing applications from blocking - * nfs_wb_nocommit. - */ + /* Flush out writes to the server in order to update c/mtime. */ if (S_ISREG(inode->i_mode)) { - mutex_lock(&inode->i_mutex); - nfs_wb_nocommit(inode); - mutex_unlock(&inode->i_mutex); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto out; } /* @@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); } +out: return err; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc7f5e9a23b..0b323091b48 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1454,7 +1454,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr pgoff_t idx_start, idx_end; unsigned int npages = 0; LIST_HEAD(head); - int nocommit = how & FLUSH_NOCOMMIT; long pages, ret; /* FIXME */ @@ -1471,14 +1470,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr npages = 0; } } - how &= ~FLUSH_NOCOMMIT; spin_lock(&inode->i_lock); do { ret = nfs_wait_on_requests_locked(inode, idx_start, npages); if (ret != 0) continue; - if (nocommit) - break; pages = nfs_scan_commit(inode, &head, idx_start, npages); if (pages == 0) break; @@ -1492,47 +1488,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr return ret; } -static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) -{ - int ret; - - ret = nfs_writepages(mapping, wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, wbc, how); - if (ret < 0) - goto out; - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; -} - -/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ -static int nfs_write_mapping(struct address_space *mapping, int how) +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) { struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = 0, .range_end = LLONG_MAX, }; - return __nfs_write_mapping(mapping, &wbc, how); -} - -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, 0); -} - -int nfs_wb_nocommit(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); + return sync_inode(inode, &wbc); } int nfs_wb_page_cancel(struct inode *inode, struct page *page) -- cgit v1.2.3 From 7f2f12d963e7c33a93bfb0b22f0178eb1e6a4196 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:28 -0800 Subject: NFS: Simplify nfs_wb_page() Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 120 +++++++++++---------------------------------------------- 1 file changed, 23 insertions(+), 97 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0b323091b48..53ff70e2399 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -502,44 +502,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) } #endif -/* - * Wait for a request to complete. - * - * Interruptible by fatal signals only. - */ -static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) -{ - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *req; - pgoff_t idx_end, next; - unsigned int res = 0; - int error; - - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - - next = idx_start; - while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { - if (req->wb_index > idx_end) - break; - - next = req->wb_index + 1; - BUG_ON(!NFS_WBACK_BUSY(req)); - - kref_get(&req->wb_kref); - spin_unlock(&inode->i_lock); - error = nfs_wait_on_request(req); - nfs_release_request(req); - spin_lock(&inode->i_lock); - if (error < 0) - return error; - res++; - } - return res; -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int nfs_need_commit(struct nfs_inode *nfsi) @@ -1432,7 +1394,7 @@ out_mark_dirty: return ret; } #else -static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) +static int nfs_commit_inode(struct inode *inode, int how) { return 0; } @@ -1448,46 +1410,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) return nfs_commit_unstable_pages(inode, wbc); } -long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) -{ - struct inode *inode = mapping->host; - pgoff_t idx_start, idx_end; - unsigned int npages = 0; - LIST_HEAD(head); - long pages, ret; - - /* FIXME */ - if (wbc->range_cyclic) - idx_start = 0; - else { - idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; - idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (idx_end > idx_start) { - pgoff_t l_npages = 1 + idx_end - idx_start; - npages = l_npages; - if (sizeof(npages) != sizeof(l_npages) && - (pgoff_t)npages != l_npages) - npages = 0; - } - } - spin_lock(&inode->i_lock); - do { - ret = nfs_wait_on_requests_locked(inode, idx_start, npages); - if (ret != 0) - continue; - pages = nfs_scan_commit(inode, &head, idx_start, npages); - if (pages == 0) - break; - pages += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&inode->i_lock); - ret = nfs_commit_list(inode, &head, how); - spin_lock(&inode->i_lock); - - } while (ret >= 0); - spin_unlock(&inode->i_lock); - return ret; -} - /* * flush the inode to disk. */ @@ -1531,45 +1453,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) return ret; } -static int nfs_wb_page_priority(struct inode *inode, struct page *page, - int how) +/* + * Write back all requests on one page - we do this before reading it. + */ +int nfs_wb_page(struct inode *inode, struct page *page) { loff_t range_start = page_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { - .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, + .nr_to_write = 0, .range_start = range_start, .range_end = range_end, }; + struct nfs_page *req; + int need_commit; int ret; - do { + while(PagePrivate(page)) { if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out_error; - } else if (!PagePrivate(page)) + } + req = nfs_find_and_lock_request(page); + if (!req) break; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); - if (ret < 0) + if (IS_ERR(req)) { + ret = PTR_ERR(req); goto out_error; - } while (PagePrivate(page)); + } + need_commit = test_bit(PG_CLEAN, &req->wb_flags); + nfs_clear_page_tag_locked(req); + if (need_commit) { + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (ret < 0) + goto out_error; + } + } return 0; out_error: - __mark_inode_dirty(inode, I_DIRTY_PAGES); return ret; } -/* - * Write back all requests on one page - we do this before reading it. - */ -int nfs_wb_page(struct inode *inode, struct page* page) -{ - return nfs_wb_page_priority(inode, page, FLUSH_STABLE); -} - #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page) -- cgit v1.2.3 From 5cf95214ccb915591e2214f81de4659302d3e452 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:29 -0800 Subject: NFS: Clean up nfs_sync_mapping Remove the redundant call to filemap_write_and_wait(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 443772df9b1..e8b41170d29 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -114,16 +114,12 @@ void nfs_clear_inode(struct inode *inode) */ int nfs_sync_mapping(struct address_space *mapping) { - int ret; + int ret = 0; - if (mapping->nrpages == 0) - return 0; - unmap_mapping_range(mapping, 0, 0, 0); - ret = filemap_write_and_wait(mapping); - if (ret != 0) - goto out; - ret = nfs_wb_all(mapping->host); -out: + if (mapping->nrpages != 0) { + unmap_mapping_range(mapping, 0, 0, 0); + ret = nfs_wb_all(mapping->host); + } return ret; } -- cgit v1.2.3 From 1cda707d52e51a6cafac0aef12d2bd7052d572e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Feb 2010 17:03:30 -0800 Subject: NFS: Remove requirement for inode->i_mutex from nfs_invalidate_mapping Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 41 +---------------------------------------- fs/nfs/symlink.c | 2 +- 3 files changed, 3 insertions(+), 42 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3c7f03b669f..a1f6b4438fb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->entry = &my_entry; nfs_block_sillyrename(dentry); - res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping); + res = nfs_revalidate_mapping(inode, filp->f_mapping); if (res < 0) goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8b41170d29..dbaaf7d2a18 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -754,7 +754,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return __nfs_revalidate_inode(server, inode); } -static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) +static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -775,49 +775,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa return 0; } -static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) -{ - int ret = 0; - - mutex_lock(&inode->i_mutex); - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) { - ret = nfs_sync_mapping(mapping); - if (ret == 0) - ret = nfs_invalidate_mapping_nolock(inode, mapping); - } - mutex_unlock(&inode->i_mutex); - return ret; -} - -/** - * nfs_revalidate_mapping_nolock - Revalidate the pagecache - * @inode - pointer to host inode - * @mapping - pointer to mapping - */ -int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int ret = 0; - - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { - ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (ret < 0) - goto out; - } - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - ret = nfs_invalidate_mapping_nolock(inode, mapping); -out: - return ret; -} - /** * nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping - * - * This version of the function will take the inode->i_mutex and attempt to - * flush out all dirty data if it needs to invalidate the page cache. */ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 412738dbfbc..2ea9e5c27e5 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) struct page *page; void *err; - err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping)); + err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) goto read_failed; page = read_cache_page(&inode->i_data, 0, -- cgit v1.2.3