From 0f585f14d4e34ab701283e9237ac7695cd7c9e31 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 12 Jan 2010 03:36:57 +0900 Subject: GFS2: Fix refcnt leak on gfs2_follow_link() error path If ->follow_link handler return the error, it should decrement nd->path refcnt. This patch fix it. Signed-off-by: OGAWA Hirofumi Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 78f73ca1ef3..84350e1be66 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -1088,7 +1088,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) error = vfs_follow_link(nd, buf); if (buf != array) kfree(buf); - } + } else + path_put(&nd->path); return ERR_PTR(error); } -- cgit v1.2.3 From c9edda7140ec6a22accf7f2f86da362dfbfd41fc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:41:34 -0500 Subject: NFS: Fix a reference leak in nfs_wb_cancel_page() Signed-off-by: Trond Myklebust Cc: stable@kernel.org Reviewed-by: Chuck Lever --- fs/nfs/write.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d171696017f..dac8d7676af 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1541,6 +1541,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) break; } ret = nfs_wait_on_request(req); + nfs_release_request(req); if (ret < 0) goto out; } -- cgit v1.2.3 From 82be934a59ff891cac598727e5a862ba2b9d1fac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:41:53 -0500 Subject: NFS: Try to commit unstable writes in nfs_release_page() If someone calls nfs_release_page(), we presumably already know that the page is clean, however it may be holding an unstable write. Signed-off-by: Trond Myklebust Cc: stable@kernel.org Reviewed-by: Chuck Lever --- fs/nfs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6b891328f33..63f2071d644 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -486,6 +486,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp) { dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); + if (gfp & __GFP_WAIT) + nfs_wb_page(page->mapping->host, page); /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; -- cgit v1.2.3 From 0aa05887af728b058af91197f0ae9b3ae63dd74a Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 26 Jan 2010 15:42:03 -0500 Subject: NFS: Make nfs_commitdata_release static The symbol nfs_commitdata_release is only used locally in this file. Make it static to prevent the following sparse warning: warning: symbol 'nfs_commitdata_release' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Cc: Trond Myklebust Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dac8d7676af..7b54b8bb101 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1233,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -void nfs_commitdata_release(void *data) +static void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; -- cgit v1.2.3 From b0706ca415b188ed58788420de4d5c9972b2afb2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 26 Jan 2010 15:42:11 -0500 Subject: NFS: Avoid warnings when CONFIG_NFS_V4=n Avoid the following warnings when CONFIG_NFS_V4=n: fs/nfs/sysctl.c:19: warning: unused variable `nfs_set_port_max' fs/nfs/sysctl.c:18: warning: unused variable `nfs_set_port_min' by making those variables contingent on NFSv4 being configured. Signed-off-by: David Howells Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- fs/nfs/sysctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 70e1fbbaaea..ad4d2e787b2 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -15,8 +15,10 @@ #include "callback.h" +#ifdef CONFIG_NFS_V4 static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; +#endif static struct ctl_table_header *nfs_callback_sysctl_table; static ctl_table nfs_cb_sysctls[] = { -- cgit v1.2.3 From 2bee72a6aa1e6d0a4f5da56217f0d0bbbdd0d9a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:42:21 -0500 Subject: NFSv4: Ensure that the NFSv4 locking can recover from stateid errors In most cases, we just want to mark the lock_stateid sequence id as being uninitialised. Signed-off-by: Trond Myklebust Cc: stable@kernel.org Reviewed-by: Chuck Lever --- fs/nfs/nfs4proc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 198d51d17c1..0b68238ed0c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4088,6 +4088,22 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = { .rpc_release = nfs4_lock_release, }; +static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_state *state = lsp->ls_state; + + switch (error) { + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_nograce(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + }; +} + static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) { struct nfs4_lockdata *data; @@ -4126,6 +4142,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ret = data->rpc_status; + if (ret) + nfs4_handle_setlk_error(data->server, data->lsp, + data->arg.new_lock_owner, ret); } else data->cancelled = 1; rpc_put_task(task); -- cgit v1.2.3 From 8e469ebd6dc32cbaf620e134d79f740bf0ebab79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:42:30 -0500 Subject: NFSv4: Don't allow posix locking against servers that don't support it Signed-off-by: Trond Myklebust Cc: stable@kernel.org Reviewed-by: Chuck Lever --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 865265bdca0..ea2f41b26ae 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -146,6 +146,7 @@ enum { NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ + NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ }; struct nfs4_state { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0b68238ed0c..be044b58e81 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1658,6 +1658,8 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in status = PTR_ERR(state); if (IS_ERR(state)) goto err_opendata_put; + if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) + set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; @@ -4200,8 +4202,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock { struct nfs_inode *nfsi = NFS_I(state->inode); unsigned char fl_flags = request->fl_flags; - int status; + int status = -ENOLCK; + if ((fl_flags & FL_POSIX) && + !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) + goto out; /* Is this a delegated open? */ status = nfs4_set_lock_state(state, request); if (status != 0) -- cgit v1.2.3 From 03391693a95900875b0973569d2d73ff3aa8972e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:42:38 -0500 Subject: NFSv4.1: Don't call nfs4_schedule_state_recovery() unnecessarily Currently, nfs4_handle_exception() will call it twice if called with an error of -NFS4ERR_STALE_CLIENTID, -NFS4ERR_STALE_STATEID or -NFS4ERR_EXPIRED. Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- fs/nfs/nfs4proc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be044b58e81..afbfe673489 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -256,12 +256,8 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, ret = nfs4_wait_clnt_recover(clp); if (ret == 0) exception->retry = 1; -#if !defined(CONFIG_NFS_V4_1) break; -#else /* !defined(CONFIG_NFS_V4_1) */ - if (!nfs4_has_session(server->nfs_client)) - break; - /* FALLTHROUGH */ +#if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: @@ -274,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, nfs4_schedule_state_recovery(clp); exception->retry = 1; break; -#endif /* !defined(CONFIG_NFS_V4_1) */ +#endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { /* We have retried a decent amount, time to -- cgit v1.2.3 From a2c0b9e291208f65221a0ad8a0c80a377707d480 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jan 2010 15:42:47 -0500 Subject: NFS: Ensure that we handle NFS4ERR_STALE_STATEID correctly Even if the server is crazy, we should be able to mark the stateid as being bad, to ensure it gets recovered. Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 44 +++++++++++++++++++++++++++++++------------- fs/nfs/nfs4state.c | 2 +- 3 files changed, 33 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ea2f41b26ae..0c6fda33d66 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -278,6 +278,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); +extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index afbfe673489..375f0fae2c6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -249,14 +249,14 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, if (state == NULL) break; nfs4_state_mark_reclaim_nograce(clp, state); - case -NFS4ERR_STALE_CLIENTID: + goto do_state_recovery; case -NFS4ERR_STALE_STATEID: + if (state == NULL) + break; + nfs4_state_mark_reclaim_reboot(clp, state); + case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(clp); - ret = nfs4_wait_clnt_recover(clp); - if (ret == 0) - exception->retry = 1; - break; + goto do_state_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -289,6 +289,12 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, } /* We failed to handle the error */ return nfs4_map_errors(ret); +do_state_recovery: + nfs4_schedule_state_recovery(clp); + ret = nfs4_wait_clnt_recover(clp); + if (ret == 0) + exception->retry = 1; + return ret; } @@ -3420,15 +3426,14 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (state == NULL) break; nfs4_state_mark_reclaim_nograce(clp, state); - case -NFS4ERR_STALE_CLIENTID: + goto do_state_recovery; case -NFS4ERR_STALE_STATEID: + if (state == NULL) + break; + nfs4_state_mark_reclaim_reboot(clp, state); + case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - nfs4_schedule_state_recovery(clp); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) - rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - task->tk_status = 0; - return -EAGAIN; + goto do_state_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -3456,6 +3461,13 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, } task->tk_status = nfs4_map_errors(task->tk_status); return 0; +do_state_recovery: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + task->tk_status = 0; + return -EAGAIN; } static int @@ -4099,6 +4111,12 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) nfs4_state_mark_reclaim_nograce(clp, state); lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; + break; + case -NFS4ERR_STALE_STATEID: + if (new_lock_owner != 0 || + (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + nfs4_state_mark_reclaim_reboot(clp, state); + lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; }; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6d263ed79e9..c1e2733f4fa 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -901,7 +901,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } -static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) +int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); -- cgit v1.2.3 From 1d6165851cd8e3f919d446cd6da35dee44e8837e Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 27 Jan 2010 22:44:36 +0300 Subject: block: fix bio_add_page for non trivial merge_bvec_fn case We have to properly decrease bi_size in order to merge_bvec_fn return right result. Otherwise this result in false merge rejects for two absolutely valid bio_vecs. This may cause significant performance penalty for example fs_block_size == 1k and block device is raid0 with small chunk_size = 8k. Then it is impossible to merge 7-th fs-block in to bio which already has 6 fs-blocks. Cc: Signed-off-by: Dmitry Monakhov Signed-off-by: Jens Axboe --- fs/bio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 12429c9553e..88094afc29e 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -542,13 +542,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page if (page == prev->bv_page && offset == prev->bv_offset + prev->bv_len) { + unsigned int prev_bv_len = prev->bv_len; prev->bv_len += len; if (q->merge_bvec_fn) { struct bvec_merge_data bvm = { + /* prev_bvec is already charged in + bi_size, discharge it in order to + simulate merging updated prev_bvec + as new bvec. */ .bi_bdev = bio->bi_bdev, .bi_sector = bio->bi_sector, - .bi_size = bio->bi_size, + .bi_size = bio->bi_size - prev_bv_len, .bi_rw = bio->bi_rw, }; -- cgit v1.2.3 From 9e9432c267e4047db98b9d4fba95099c6effcef9 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Sat, 30 Jan 2010 20:28:19 +0100 Subject: block: fix bugs in bio-integrity mempool usage Fix two bugs in the bio integrity code: use_bip_pool() always returns 0 because it checks against the wrong limit, causing the mempool to be used only when regular allocation fails. When the mempool is used as a fallback we don't free the data properly. Signed-Off-By: Chuck Ebbert Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 49a34e7f730..a16f29e888c 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -61,7 +61,7 @@ static inline unsigned int vecs_to_idx(unsigned int nr) static inline int use_bip_pool(unsigned int idx) { - if (idx == BIOVEC_NR_POOLS) + if (idx == BIOVEC_MAX_IDX) return 1; return 0; @@ -95,6 +95,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, /* Use mempool if lower order alloc failed or max vecs were requested */ if (bip == NULL) { + idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */ bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); if (unlikely(bip == NULL)) { -- cgit v1.2.3 From 3256a05531b1164a9c138da701b922a113bddf82 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 31 Jan 2010 12:39:50 +0900 Subject: nilfs2: fix potential leak of dirty data on umount This fixes incorrect usage of nilfs_segctor_confirm() test function in nilfs_segctor_destroy(); nilfs_segctor_confirm() returns zero if the filesystem is not clean, so its use in nilfs_segctor_destroy() needs inversion. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 17584c52448..105b508b47a 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2829,7 +2829,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); - if (flag || nilfs_segctor_confirm(sci)) + if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); WARN_ON(!list_empty(&sci->sc_copied_buffers)); -- cgit v1.2.3 From 55f0b4c546d1c87cccba63dc0fc5eb70e2b41733 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Mon, 25 Jan 2010 11:23:24 -0600 Subject: GFS2: Don't withdraw on partial rindex entries ince gfs2 writes the rindex file a block at a time, and releases the exclusive lock after each block, it is possible that another process will grab the lock in the middle of the write. Since rindex entries are not an even divisor of blocks, that other process may see partial entries. On grows, this is fine. The process can simply ignore the the partial entires. Previously, the code withdrew when it saw partial entries. Now it simply ignores them. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0608f490c29..6702b82b375 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -591,11 +591,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip) u64 rgrp_count = ip->i_disksize; int error; - if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) { - gfs2_consist_inode(ip); - return -EIO; - } - clear_rgrpdi(sdp); file_ra_state_init(&ra_state, inode->i_mapping); -- cgit v1.2.3 From 7fe3ec6fe58d2bfe97fe7a5d731c29299a8ffd35 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 29 Jan 2010 15:20:34 +0000 Subject: GFS2: Fix previous patch The do_div() call needs to remain. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 6702b82b375..46534a554cc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -591,6 +591,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip) u64 rgrp_count = ip->i_disksize; int error; + do_div(rgrp_count, sizeof(struct gfs2_rindex)); clear_rgrpdi(sdp); file_ra_state_init(&ra_state, inode->i_mapping); -- cgit v1.2.3 From ea8d62dadd0217334fb2c5d60e7f89e14076ca10 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 29 Jan 2010 15:48:57 +0000 Subject: GFS2: Use GFP_NOFS for alloc structure This is called under a glock, so its a good plan to use GFP_NOFS Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 46534a554cc..503b842f3ba 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -911,7 +911,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) { BUG_ON(ip->i_alloc != NULL); - ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL); + ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); return ip->i_alloc; } -- cgit v1.2.3 From 7ab02af428c2d312c0cf8fb0b01cc1eb21131a3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 2 Feb 2010 12:37:44 -0800 Subject: Fix 'flush_old_exec()/setup_new_exec()' split Commit 221af7f87b9 ("Split 'flush_old_exec' into two functions") split the function at the point of no return - ie right where there were no more error cases to check. That made sense from a technical standpoint, but when we then also combined it with the actual personality setting going in between flush_old_exec() and setup_new_exec(), it needs to be a bit more careful. In particular, we need to make sure that we really flush the old personality bits in the 'flush' stage, rather than later in the 'setup' stage, since otherwise we might be flushing the _new_ personality state that we're just setting up. So this moves the flags and personality flushing (and 'flush_thread()', which is the arch-specific function that generally resets lazy FP state etc) of the old process into flush_old_exec(), so that it doesn't affect any state that execve() is setting up for the new process environment. This was reported by Michal Simek as breaking his Microblaze qemu environment. Reported-and-tested-by: Michal Simek Cc: Peter Anvin Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 675c3f44c2e..0790a107ff7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -961,6 +961,11 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; bprm->mm = NULL; /* We're using it now */ + + current->flags &= ~PF_RANDOMIZE; + flush_thread(); + current->personality &= ~bprm->per_clear; + return 0; out: @@ -997,9 +1002,6 @@ void setup_new_exec(struct linux_binprm * bprm) tcomm[i] = '\0'; set_task_comm(current, tcomm); - current->flags &= ~PF_RANDOMIZE; - flush_thread(); - /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on * some architectures like powerpc @@ -1015,8 +1017,6 @@ void setup_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } - current->personality &= ~bprm->per_clear; - /* * Flush performance counters when crossing a * security domain: -- cgit v1.2.3 From 931e80e4b3263db75c8e34f078d22f11bbabd3a3 Mon Sep 17 00:00:00 2001 From: anfei zhou Date: Tue, 2 Feb 2010 13:44:02 -0800 Subject: mm: flush dcache before writing into page to avoid alias The cache alias problem will happen if the changes of user shared mapping is not flushed before copying, then user and kernel mapping may be mapped into two different cache line, it is impossible to guarantee the coherence after iov_iter_copy_from_user_atomic. So the right steps should be: flush_dcache_page(page); kmap_atomic(page); write to page; kunmap_atomic(page); flush_dcache_page(page); More precisely, we might create two new APIs flush_dcache_user_page and flush_dcache_kern_page to replace the two flush_dcache_page accordingly. Here is a snippet tested on omap2430 with VIPT cache, and I think it is not ARM-specific: int val = 0x11111111; fd = open("abc", O_RDWR); addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); *(addr+0) = 0x44444444; tmp = *(addr+0); *(addr+1) = 0x77777777; write(fd, &val, sizeof(int)); close(fd); The results are not always 0x11111111 0x77777777 at the beginning as expected. Sometimes we see 0x44444444 0x77777777. Signed-off-by: Anfei Cc: Russell King Cc: Miklos Szeredi Cc: Nick Piggin Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c18913a777a..a9f5e137f1d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -828,6 +828,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (!page) break; + if (mapping_writably_mapped(mapping)) + flush_dcache_page(page); + pagefault_disable(); tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); pagefault_enable(); -- cgit v1.2.3 From e402746a945ceb9d0486a8e3d5917c9228fa4404 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 25 Jan 2010 11:20:19 +0000 Subject: GFS2: Wait for unlock completion on umount This patch adds a wait on umount between the point at which we dispose of all glocks and the point at which we unmount the lock protocol. This ensures that we've received all the replies to our unlock requests before we stop the locking. Signed-off-by: Steven Whitehouse Reported-by: Fabio M. Di Nitto --- fs/gfs2/incore.h | 2 ++ fs/gfs2/lock_dlm.c | 7 ++++++- fs/gfs2/ops_fstype.c | 2 ++ fs/gfs2/super.c | 3 +++ 4 files changed, 13 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 4792200978c..bc0ad158e6b 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -544,6 +544,8 @@ struct gfs2_sbd { struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_trans_gl; + wait_queue_head_t sd_glock_wait; + atomic_t sd_glock_disposal; /* Inode Stuff */ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 46df988323b..cdd0755d782 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -21,6 +21,7 @@ static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; + struct gfs2_sbd *sdp = gl->gl_sbd; BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); @@ -30,6 +31,8 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ kmem_cache_free(gfs2_glock_cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); return; case -DLM_ECANCEL: /* Cancel while getting lock */ ret |= LM_OUT_CANCELED; @@ -167,7 +170,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl, static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) { struct gfs2_glock *gl = ptr; - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; if (gl->gl_lksb.sb_lkid == 0) { @@ -183,6 +187,7 @@ static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) (unsigned long long)gl->gl_name.ln_number, error); return; } + atomic_inc(&sdp->sd_glock_disposal); } static void gdlm_cancel(struct gfs2_glock *gl) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index edfee24f363..9390fc7d8d4 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -82,6 +82,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) gfs2_tune_init(&sdp->sd_tune); + init_waitqueue_head(&sdp->sd_glock_wait); + atomic_set(&sdp->sd_glock_disposal, 0); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index c282ad41f3d..66242b32db5 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -860,6 +861,8 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); + /* Wait for dlm to reply to all our unlock requests */ + wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); -- cgit v1.2.3 From 8f05228ee7c8f409ae3c6f9c3e13d7ccb9c18360 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 29 Jan 2010 15:21:27 +0000 Subject: GFS2: Extend umount wait coverage to full glock lifetime Although all glocks are, by the time of the umount glock wait, scheduled for demotion, some of them haven't made it far enough through the process for the original set of waiting code to wait for them. This extends the ref count to the whole glock lifetime in order to ensure that the waiting does catch all glocks. It does make it a bit more invasive, but it seems the only sensible solution at the moment. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 ++++ fs/gfs2/glock.h | 2 +- fs/gfs2/lock_dlm.c | 6 +++--- fs/gfs2/ops_fstype.c | 10 +++++++++- fs/gfs2/super.c | 2 -- 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f455a03a09e..f4266332593 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (!gl) return -ENOMEM; + atomic_inc(&sdp->sd_glock_disposal); gl->gl_flags = 0; gl->gl_name = name; atomic_set(&gl->gl_ref, 1); @@ -1538,6 +1539,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) up_write(&gfs2_umount_flush_sem); msleep(10); } + flush_workqueue(glock_workqueue); + wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); + gfs2_dump_lockstate(sdp); } void gfs2_glock_finish_truncate(struct gfs2_inode *ip) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 13f0bd22813..c0262faf472 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -123,7 +123,7 @@ struct lm_lockops { int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); void (*lm_unmount) (struct gfs2_sbd *sdp); void (*lm_withdraw) (struct gfs2_sbd *sdp); - void (*lm_put_lock) (struct kmem_cache *cachep, void *gl); + void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); unsigned int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, unsigned int flags); void (*lm_cancel) (struct gfs2_glock *gl); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index cdd0755d782..0e5e0e7022e 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -167,15 +167,16 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl, return LM_OUT_ASYNC; } -static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) +static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) { - struct gfs2_glock *gl = ptr; struct gfs2_sbd *sdp = gl->gl_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; if (gl->gl_lksb.sb_lkid == 0) { kmem_cache_free(cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); return; } @@ -187,7 +188,6 @@ static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr) (unsigned long long)gl->gl_name.ln_number, error); return; } - atomic_inc(&sdp->sd_glock_disposal); } static void gdlm_cancel(struct gfs2_glock *gl) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9390fc7d8d4..8a102f73100 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -985,9 +985,17 @@ static const match_table_t nolock_tokens = { { Opt_err, NULL }, }; +static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + kmem_cache_free(cachep, gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); +} + static const struct lm_lockops nolock_ops = { .lm_proto_name = "lock_nolock", - .lm_put_lock = kmem_cache_free, + .lm_put_lock = nolock_put_lock, .lm_tokens = &nolock_tokens, }; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 66242b32db5..b9dd3da22c0 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -861,8 +861,6 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); - /* Wait for dlm to reply to all our unlock requests */ - wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); -- cgit v1.2.3 From 9f557cd8073104b39528794d44e129331ded649f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Feb 2010 08:27:22 -0500 Subject: NFS: Fix an Oops when truncating a file The VM/VFS does not allow mapping->a_ops->invalidatepage() to fail. Unfortunately, nfs_wb_page_cancel() may fail if a fatal signal occurs. Since the NFS code assumes that the page stays mapped for as long as the writeback is active, we can end up Oopsing (among other things). The only safe fix here is to convert nfs_wait_on_request(), so as to make it uninterruptible (as is already the case with wait_on_page_writeback()). Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/pagelist.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e2975939126..a12c45b65dd 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -176,6 +176,12 @@ void nfs_release_request(struct nfs_page *req) kref_put(&req->wb_kref, nfs_free_request); } +static int nfs_wait_bit_uninterruptible(void *word) +{ + io_schedule(); + return 0; +} + /** * nfs_wait_on_request - Wait for a request to complete. * @req: request to wait upon. @@ -186,14 +192,9 @@ void nfs_release_request(struct nfs_page *req) int nfs_wait_on_request(struct nfs_page *req) { - int ret = 0; - - if (!test_bit(PG_BUSY, &req->wb_flags)) - goto out; - ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, - nfs_wait_bit_killable, TASK_KILLABLE); -out: - return ret; + return wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_uninterruptible, + TASK_UNINTERRUPTIBLE); } /** -- cgit v1.2.3 From 387c149b54b4321cbc790dadbd4f8eedb5a90468 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Feb 2010 08:27:35 -0500 Subject: NFS: Fix a umount race Ensure that we unregister the bdi before kill_anon_super() calls ida_remove() on our device name. Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/super.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ce907efc550..f1afee4eea7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -243,6 +243,7 @@ static int nfs_show_stats(struct seq_file *, struct vfsmount *); static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static void nfs_put_super(struct super_block *); static void nfs_kill_super(struct super_block *); static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); @@ -266,6 +267,7 @@ static const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, @@ -335,6 +337,7 @@ static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, + .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, @@ -2257,6 +2260,17 @@ error_splat_super: goto out; } +/* + * Ensure that we unregister the bdi before kill_anon_super + * releases the device name + */ +static void nfs_put_super(struct super_block *s) +{ + struct nfs_server *server = NFS_SB(s); + + bdi_unregister(&server->backing_dev_info); +} + /* * Destroy an NFS2/3 superblock */ @@ -2265,7 +2279,6 @@ static void nfs_kill_super(struct super_block *s) struct nfs_server *server = NFS_SB(s); kill_anon_super(s); - bdi_unregister(&server->backing_dev_info); nfs_fscache_release_super_cookie(s); nfs_free_server(server); } -- cgit v1.2.3 From 9b4b351346b41d923d69adec865814fdaac4dba9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Feb 2010 08:27:35 -0500 Subject: NFS: Don't clobber the attribute type in nfs_update_inode() If the NFS_ATTR_FATTR_TYPE field isn't set in fattr->valid, then we should not set the S_IFMT part of inode->i_mode. Reported-by: Al Viro Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index faa091865ad..f141bde7756 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1261,8 +1261,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { + umode_t newmode = inode->i_mode & S_IFMT; + newmode |= fattr->mode & S_IALLUGO; + inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - inode->i_mode = fattr->mode; } } else if (server->caps & NFS_CAP_MODE) invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR -- cgit v1.2.3