From fd9db7297749c05fcf5721ce5393a5a8b8772f2a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:35 +0200 Subject: fuse: destroy bdi on error Destroy bdi on error in fuse_fill_super(). This was an omission from commit 26c3679101dbccc054dcf370143941844ba70531 "fuse: destroy bdi on umount", which moved the bdi_destroy() call from fuse_conn_put() to fuse_put_super(). Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 459b73dd45e..75ca5ac603a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -908,6 +908,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err_put_root: dput(root_dentry); err_put_conn: + bdi_destroy(&fc->bdi); fuse_conn_put(fc); err_fput: fput(file); -- cgit v1.2.3 From ccecee1e5e42981f5eb37f4411e8552b9db04d30 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 1 May 2009 11:33:53 -0400 Subject: nfsd41: slots are freed with session The session and slots are allocated all in one piece. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c65a27b76a9..3b711f5147a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -580,7 +580,6 @@ free_session(struct kref *kref) struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; nfsd4_release_respages(e->ce_respages, e->ce_resused); } - kfree(ses->se_slots); kfree(ses); } -- cgit v1.2.3 From b2c0cea6b1cb210e962f07047df602875564069e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 5 May 2009 19:04:29 -0400 Subject: nfsd4: check for negative dentry before use in nfsv4 readdir After 2f9092e1020246168b1309b35e085ecd7ff9ff72 "Fix i_mutex vs. readdir handling in nfsd" (and 14f7dd63 "Copy XFS readdir hack into nfsd code"), an entry may be removed between the first mutex_unlock and the second mutex_lock. In this case, lookup_one_len() will return a negative dentry. Check for this case to avoid a NULL dereference. Signed-off-by: J. Bruce Fields Reviewed-by: J. R. Okajima Cc: stable@kernel.org --- fs/nfsd/nfs4xdr.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b820c311931..b73549d293b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2214,6 +2214,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); + if (!dentry->d_inode) { + /* + * nfsd_buffered_readdir drops the i_mutex between + * readdir and calling this callback, leaving a window + * where this directory entry could have gone away. + */ + dput(dentry); + return nfserr_noent; + } exp_get(exp); /* @@ -2276,6 +2285,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); int buflen; __be32 *p = cd->buffer; + __be32 *cookiep; __be32 nfserr = nfserr_toosmall; /* In nfsv4, "." and ".." never make it onto the wire.. */ @@ -2292,7 +2302,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; *p++ = xdr_one; /* mark entry present */ - cd->offset = p; /* remember pointer */ + cookiep = p; p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ @@ -2306,6 +2316,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; case nfserr_dropit: goto fail; + case nfserr_noent: + goto skip_entry; default: /* * If the client requested the RDATTR_ERROR attribute, @@ -2324,6 +2336,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, } cd->buflen -= (p - cd->buffer); cd->buffer = p; + cd->offset = cookiep; +skip_entry: cd->common.err = nfs_ok; return 0; fail: -- cgit v1.2.3 From 89996df4b5b1a09c279f50b3fd03aa9df735f5cb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 6 May 2009 16:32:54 -0400 Subject: lockd: fix list corruption on lockd restart If lockd is signalled soon enough after restart then locks_start_grace() will try to re-add an entry to a list and trigger a lock corruption warning. Thanks to Wang Chen for the problem report and diagnosis. WARNING: at lib/list_debug.c:26 __list_add+0x27/0x5c() ... list_add corruption. next->prev should be prev (ef8fe958), but was ef8ff128. (next=ef8ff128). ... Pid: 23062, comm: lockd Tainted: G W 2.6.30-rc2 #3 Call Trace: [] warn_slowpath+0x71/0xa0 [] ? update_curr+0x11d/0x125 [] ? trace_hardirqs_on_caller+0x18/0x150 [] ? trace_hardirqs_on+0xb/0xd [] ? _raw_spin_lock+0x53/0xfa [] __list_add+0x27/0x5c [] locks_start_grace+0x22/0x30 [lockd] [] set_grace_period+0x39/0x53 [lockd] [] ? lock_kernel+0x1c/0x28 [] lockd+0x64/0x164 [lockd] [] ? trace_hardirqs_on_caller+0x18/0x150 [] ? complete+0x34/0x3e [] ? lockd+0x0/0x164 [lockd] [] ? lockd+0x0/0x164 [lockd] [] kthread+0x45/0x6b [] ? kthread+0x0/0x6b [] kernel_thread_helper+0x7/0x10 Reported-by: Wang Chen Signed-off-by: J. Bruce Fields Cc: stable@kernel.org --- fs/lockd/svc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index abf83881f68..1a54ae14a19 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -104,6 +104,16 @@ static void set_grace_period(void) schedule_delayed_work(&grace_period_end, grace_period); } +static void restart_grace(void) +{ + if (nlmsvc_ops) { + cancel_delayed_work_sync(&grace_period_end); + locks_end_grace(&lockd_manager); + nlmsvc_invalidate_all(); + set_grace_period(); + } +} + /* * This is the lockd kernel thread */ @@ -149,10 +159,7 @@ lockd(void *vrqstp) if (signalled()) { flush_signals(current); - if (nlmsvc_ops) { - nlmsvc_invalidate_all(); - set_grace_period(); - } + restart_grace(); continue; } -- cgit v1.2.3 From 85c2a74fabadfc561b75fbd7decc6bcbfe873d57 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 28 Apr 2009 23:38:46 +0900 Subject: nilfs2: fix possible recovery failure due to block creation without writer Some function calls in nilfs_prepare_segment_for_recovery() may fail because they can create blocks on meta data files without configuring a writable FS-instance. Concretely, nilfs_mdt_create_block() routine of meta data files will fail in that case. This fixes the problem by temporarily attaching a writable FS-instace during the function is called. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/recovery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 4fc081e47d7..57afa9d2406 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -407,6 +407,7 @@ void nilfs_dispose_segment_list(struct list_head *head) } static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, struct nilfs_recovery_info *ri) { struct list_head *head = &ri->ri_used_segments; @@ -421,6 +422,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, segnum[2] = ri->ri_segnum; segnum[3] = ri->ri_nextnum; + nilfs_attach_writer(nilfs, sbi); /* * Releasing the next segment of the latest super root. * The next segment is invalidated by this recovery. @@ -459,10 +461,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, nilfs->ns_pseg_offset = 0; nilfs->ns_seg_seq = ri->ri_seq + 2; nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0]; - return 0; failed: /* No need to recover sufile because it will be destroyed on error */ + nilfs_detach_writer(nilfs, sbi); return err; } @@ -728,7 +730,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, goto failed; if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { - err = nilfs_prepare_segment_for_recovery(nilfs, ri); + err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri); if (unlikely(err)) { printk(KERN_ERR "NILFS: Error preparing segments for " "recovery.\n"); -- cgit v1.2.3 From 201913ed746c7724a40d33ee5a0b6a1fd2ef3193 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 28 Apr 2009 21:04:59 +0900 Subject: nilfs2: fix circular locking dependency of writer mutex This fixes the following circular locking dependency problem: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3 #5 ------------------------------------------------------- segctord/3895 is trying to acquire lock: (&nilfs->ns_writer_mutex){+.+...}, at: [] nilfs_mdt_get_block+0x89/0x20f [nilfs2] but task is already holding lock: (&bmap->b_sem){++++..}, at: [] nilfs_bmap_propagate+0x14/0x2e [nilfs2] which lock already depends on the new lock. The bugfix is done by replacing call sites of nilfs_get_writer() which are never called from read-only context with direct dereferencing of pointer to a writable FS-instance. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 8 +++++--- fs/nilfs2/mdt.c | 13 +++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 108d281ebca..be387c6b2d4 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -516,14 +516,16 @@ static ssize_t nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); + struct nilfs_sb_info *sbi = nilfs->ns_writer; int ret; - if (unlikely(!sbi)) + if (unlikely(!sbi)) { + /* never happens because called for a writable mount */ + WARN_ON(1); return -EROFS; + } ret = nilfs_segctor_add_segments_to_be_freed( NILFS_SC(sbi), buf, nmembs); - nilfs_put_writer(nilfs); return (ret < 0) ? ret : nmembs; } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 47dd815433f..e1c6777931b 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -77,19 +77,22 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, void *)) { struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; - struct nilfs_sb_info *writer = NULL; struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; if (!sb) { - writer = nilfs_get_writer(nilfs); - if (!writer) { + /* + * Make sure this function is not called from any + * read-only context. + */ + if (!nilfs->ns_writer) { + WARN_ON(1); err = -EROFS; goto out; } - sb = writer->s_super; + sb = nilfs->ns_writer->s_super; } nilfs_transaction_begin(sb, &ti, 0); @@ -127,8 +130,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); - if (writer) - nilfs_put_writer(nilfs); out: return err; } -- cgit v1.2.3 From 0c7a531a200480c7bc447260376973d830da9069 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 30 Apr 2009 14:52:58 +0100 Subject: GFS2: Fix glock ref counting bug Depending on the ordering of events as we go around the glock shrinker loop, it is possible to drop the ref count of a glock incorrectly. It doesn't happen very often. This patch corrects the got_ref variable, fixing the problem. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1afd9f26bcb..ff498109048 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1304,6 +1304,7 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) nr--; if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); + got_ref = 0; } spin_lock(&lru_lock); if (may_demote) -- cgit v1.2.3 From 67e55205ec55cc7899f1d783f217961596798419 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Fri, 24 Apr 2009 09:06:53 +0200 Subject: vfs: umount_begin BKL pushdown Push BKL down into ->umount_begin() Signed-off-by: Alessio Igor Bogani Signed-off-by: Al Viro --- fs/9p/vfs_super.c | 6 +++++- fs/cifs/cifsfs.c | 3 +++ fs/fuse/inode.c | 3 +++ fs/namespace.c | 2 -- fs/nfs/super.c | 7 ++++++- 5 files changed, 17 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 5f8ab8adb5f..7d23214e559 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -230,9 +231,12 @@ static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) static void v9fs_umount_begin(struct super_block *sb) { - struct v9fs_session_info *v9ses = sb->s_fs_info; + struct v9fs_session_info *v9ses; + lock_kernel(); + v9ses = sb->s_fs_info; v9fs_session_cancel(v9ses); + unlock_kernel(); } static const struct super_operations v9fs_super_ops = { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0d6d8b57365..355e0efec0c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "cifsfs.h" #include "cifspdu.h" #define DECLARE_GLOBALS_HERE @@ -530,6 +531,7 @@ static void cifs_umount_begin(struct super_block *sb) if (tcon == NULL) return; + lock_kernel(); read_lock(&cifs_tcp_ses_lock); if (tcon->tc_count == 1) tcon->tidStatus = CifsExiting; @@ -548,6 +550,7 @@ static void cifs_umount_begin(struct super_block *sb) } /* BB FIXME - finish add checks for tidStatus BB */ + unlock_kernel(); return; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 459b73dd45e..d1bc4d33ccb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -19,6 +19,7 @@ #include #include #include +#include MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -259,7 +260,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, static void fuse_umount_begin(struct super_block *sb) { + lock_kernel(); fuse_abort_conn(get_fuse_conn_super(sb)); + unlock_kernel(); } static void fuse_send_destroy(struct fuse_conn *fc) diff --git a/fs/namespace.c b/fs/namespace.c index 41196209a90..0d2003fb437 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1073,9 +1073,7 @@ static int do_umount(struct vfsmount *mnt, int flags) */ if (flags & MNT_FORCE && sb->s_op->umount_begin) { - lock_kernel(); sb->s_op->umount_begin(sb); - unlock_kernel(); } /* diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6717200923f..1679a164c8c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -683,9 +683,12 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) */ static void nfs_umount_begin(struct super_block *sb) { - struct nfs_server *server = NFS_SB(sb); + struct nfs_server *server; struct rpc_clnt *rpc; + lock_kernel(); + + server = NFS_SB(sb); /* -EIO all pending I/O */ rpc = server->client_acl; if (!IS_ERR(rpc)) @@ -693,6 +696,8 @@ static void nfs_umount_begin(struct super_block *sb) rpc = server->client; if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); + + unlock_kernel(); } /* -- cgit v1.2.3 From 5a6059c358d3b3004cb6b8562e3b049b1f8564f9 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 1 May 2009 12:11:12 -0400 Subject: reiserfs: Expand i_mutex to enclose lookup_one_len 2.6.30-rc3 introduced some sanity checks in the VFS code to avoid NFS bugs by ensuring that lookup_one_len is always called under i_mutex. This patch expands the i_mutex locking to enclose lookup_one_len. This was always required, but not not enforced in the reiserfs code since it does locking around the xattr interactions with the xattr_sem. This is obvious enough, and it survived an overnight 50 thread ACL test. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index f83f52bae39..31a3dbb120e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -120,25 +120,20 @@ static struct dentry *lookup_or_create_dir(struct dentry *parent, struct dentry *dentry; BUG_ON(!parent); + mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_XATTR); dentry = lookup_one_len(name, parent, strlen(name)); - if (IS_ERR(dentry)) - return dentry; - else if (!dentry->d_inode) { + if (!IS_ERR(dentry) && !dentry->d_inode) { int err = -ENODATA; - if (xattr_may_create(flags)) { - mutex_lock_nested(&parent->d_inode->i_mutex, - I_MUTEX_XATTR); + if (xattr_may_create(flags)) err = xattr_mkdir(parent->d_inode, dentry, 0700); - mutex_unlock(&parent->d_inode->i_mutex); - } if (err) { dput(dentry); dentry = ERR_PTR(err); } } - + mutex_unlock(&parent->d_inode->i_mutex); return dentry; } @@ -184,6 +179,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, { struct reiserfs_dentry_buf *dbuf = buf; struct dentry *dentry; + WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) return -ENOSPC; @@ -349,6 +345,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name, if (IS_ERR(xadir)) return ERR_CAST(xadir); + mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); xafile = lookup_one_len(name, xadir, strlen(name)); if (IS_ERR(xafile)) { err = PTR_ERR(xafile); @@ -360,18 +357,15 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name, if (!xafile->d_inode) { err = -ENODATA; - if (xattr_may_create(flags)) { - mutex_lock_nested(&xadir->d_inode->i_mutex, - I_MUTEX_XATTR); + if (xattr_may_create(flags)) err = xattr_create(xadir->d_inode, xafile, 0700|S_IFREG); - mutex_unlock(&xadir->d_inode->i_mutex); - } } if (err) dput(xafile); out: + mutex_unlock(&xadir->d_inode->i_mutex); dput(xadir); if (err) return ERR_PTR(err); @@ -435,6 +429,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name) if (IS_ERR(xadir)) return PTR_ERR(xadir); + mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); dentry = lookup_one_len(name, xadir, strlen(name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -442,14 +437,13 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name) } if (dentry->d_inode) { - mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR); err = xattr_unlink(xadir->d_inode, dentry); - mutex_unlock(&xadir->d_inode->i_mutex); update_ctime(inode); } dput(dentry); out_dput: + mutex_unlock(&xadir->d_inode->i_mutex); dput(xadir); return err; } @@ -906,9 +900,9 @@ static int create_privroot(struct dentry *dentry) { int err; struct inode *inode = dentry->d_parent->d_inode; - mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); + WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); + err = xattr_mkdir(inode, dentry, 0700); - mutex_unlock(&inode->i_mutex); if (err) { dput(dentry); dentry = NULL; @@ -980,6 +974,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) /* If we don't have the privroot located yet - go find it */ if (!REISERFS_SB(s)->priv_root) { struct dentry *dentry; + mutex_lock_nested(&s->s_root->d_inode->i_mutex, I_MUTEX_CHILD); dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { @@ -993,6 +988,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) } } else err = PTR_ERR(dentry); + mutex_unlock(&s->s_root->d_inode->i_mutex); if (!err && dentry) { s->s_root->d_op = &xattr_lookup_poison_ops; -- cgit v1.2.3 From edcc37a0478836b4a51eafb1bcec6a52708f681d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 May 2009 06:00:05 -0400 Subject: Always lookup priv_root on reiserfs mount and keep it ... even if it's a negative dentry. That way we can set ->d_op on root before anyone could race with us. Simplify d_compare(), while we are at it. Signed-off-by: Al Viro --- fs/reiserfs/super.c | 6 ++-- fs/reiserfs/xattr.c | 86 ++++++++++++++++++++++------------------------------- 2 files changed, 40 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0ae6486d904..d444fe0013a 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1842,7 +1842,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) goto error; } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; @@ -1855,7 +1856,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) reiserfs_info(s, "using 3.5.x disk format\n"); } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 31a3dbb120e..2891f789f54 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -903,16 +903,19 @@ static int create_privroot(struct dentry *dentry) WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); err = xattr_mkdir(inode, dentry, 0700); - if (err) { - dput(dentry); - dentry = NULL; + if (err || !dentry->d_inode) { + reiserfs_warning(dentry->d_sb, "jdm-20006", + "xattrs/ACLs enabled and couldn't " + "find/create .reiserfs_priv. " + "Failing mount."); + return -EOPNOTSUPP; } - if (dentry && dentry->d_inode) - reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " - "storage.\n", PRIVROOT_NAME); + dentry->d_inode->i_flags |= S_PRIVATE; + reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " + "storage.\n", PRIVROOT_NAME); - return err; + return 0; } static int xattr_mount_check(struct super_block *s) @@ -944,11 +947,9 @@ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) { struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; - if (name->len == priv_root->d_name.len && - name->hash == priv_root->d_name.hash && - !memcmp(name->name, priv_root->d_name.name, name->len)) { + if (container_of(q1, struct dentry, d_name) == priv_root) return -ENOENT; - } else if (q1->len == name->len && + if (q1->len == name->len && !memcmp(q1->name, name->name, name->len)) return 0; return 1; @@ -958,6 +959,27 @@ static const struct dentry_operations xattr_lookup_poison_ops = { .d_compare = xattr_lookup_poison, }; +int reiserfs_lookup_privroot(struct super_block *s) +{ + struct dentry *dentry; + int err = 0; + + /* If we don't have the privroot located yet - go find it */ + mutex_lock(&s->s_root->d_inode->i_mutex); + dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, + strlen(PRIVROOT_NAME)); + if (!IS_ERR(dentry)) { + REISERFS_SB(s)->priv_root = dentry; + s->s_root->d_op = &xattr_lookup_poison_ops; + if (dentry->d_inode) + dentry->d_inode->i_flags |= S_PRIVATE; + } else + err = PTR_ERR(dentry); + mutex_unlock(&s->s_root->d_inode->i_mutex); + + return err; +} + /* We need to take a copy of the mount flags since things like * MS_RDONLY don't get set until *after* we're called. * mount_flags != mount_options */ @@ -969,48 +991,12 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) err = xattr_mount_check(s); if (err) goto error; -#endif - /* If we don't have the privroot located yet - go find it */ - if (!REISERFS_SB(s)->priv_root) { - struct dentry *dentry; - mutex_lock_nested(&s->s_root->d_inode->i_mutex, I_MUTEX_CHILD); - dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, - strlen(PRIVROOT_NAME)); - if (!IS_ERR(dentry)) { -#ifdef CONFIG_REISERFS_FS_XATTR - if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) - err = create_privroot(dentry); -#endif - if (!dentry->d_inode) { - dput(dentry); - dentry = NULL; - } - } else - err = PTR_ERR(dentry); + if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + mutex_lock(&s->s_root->d_inode->i_mutex); + err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); - - if (!err && dentry) { - s->s_root->d_op = &xattr_lookup_poison_ops; - dentry->d_inode->i_flags |= S_PRIVATE; - REISERFS_SB(s)->priv_root = dentry; -#ifdef CONFIG_REISERFS_FS_XATTR - /* xattrs are unavailable */ - } else if (!(mount_flags & MS_RDONLY)) { - /* If we're read-only it just means that the dir - * hasn't been created. Not an error -- just no - * xattrs on the fs. We'll check again if we - * go read-write */ - reiserfs_warning(s, "jdm-20006", - "xattrs/ACLs enabled and couldn't " - "find/create .reiserfs_priv. " - "Failing mount."); - err = -EOPNOTSUPP; -#endif - } } - -#ifdef CONFIG_REISERFS_FS_XATTR if (!err) s->s_xattr = reiserfs_xattr_handlers; -- cgit v1.2.3 From ab17c4f02156c4f75d7fa43a5aa2a7f942d47201 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:15 -0400 Subject: reiserfs: fixup xattr_root caching The xattr_root caching was broken from my previous patch set. It wouldn't cause corruption, but could cause decreased performance due to allocating a larger chunk of the journal (~ 27 blocks) than it would actually use. This patch loads the xattr root dentry at xattr initialization and creates it on-demand. Since we're using the cached dentry, there's no point in keeping lookup_or_create_dir around, so that's removed. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr.c | 73 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 2891f789f54..c77984473db 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -113,36 +113,28 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) #define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) -/* Returns and possibly creates the xattr dir. */ -static struct dentry *lookup_or_create_dir(struct dentry *parent, - const char *name, int flags) +static struct dentry *open_xa_root(struct super_block *sb, int flags) { - struct dentry *dentry; - BUG_ON(!parent); + struct dentry *privroot = REISERFS_SB(sb)->priv_root; + struct dentry *xaroot; + if (!privroot->d_inode) + return ERR_PTR(-ENODATA); - mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_XATTR); - dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry) && !dentry->d_inode) { - int err = -ENODATA; + mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); + xaroot = dget(REISERFS_SB(sb)->xattr_root); + if (!xaroot->d_inode) { + int err = -ENODATA; if (xattr_may_create(flags)) - err = xattr_mkdir(parent->d_inode, dentry, 0700); - + err = xattr_mkdir(privroot->d_inode, xaroot, 0700); if (err) { - dput(dentry); - dentry = ERR_PTR(err); + dput(xaroot); + xaroot = ERR_PTR(err); } } - mutex_unlock(&parent->d_inode->i_mutex); - return dentry; -} -static struct dentry *open_xa_root(struct super_block *sb, int flags) -{ - struct dentry *privroot = REISERFS_SB(sb)->priv_root; - if (!privroot) - return ERR_PTR(-ENODATA); - return lookup_or_create_dir(privroot, XAROOT_NAME, flags); + mutex_unlock(&privroot->d_inode->i_mutex); + return xaroot; } static struct dentry *open_xa_dir(const struct inode *inode, int flags) @@ -158,10 +150,22 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) le32_to_cpu(INODE_PKEY(inode)->k_objectid), inode->i_generation); - xadir = lookup_or_create_dir(xaroot, namebuf, flags); + mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR); + + xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); + if (!IS_ERR(xadir) && !xadir->d_inode) { + int err = -ENODATA; + if (xattr_may_create(flags)) + err = xattr_mkdir(xaroot->d_inode, xadir, 0700); + if (err) { + dput(xadir); + xadir = ERR_PTR(err); + } + } + + mutex_unlock(&xaroot->d_inode->i_mutex); dput(xaroot); return xadir; - } /* The following are side effects of other operations that aren't explicitly @@ -986,19 +990,33 @@ int reiserfs_lookup_privroot(struct super_block *s) int reiserfs_xattr_init(struct super_block *s, int mount_flags) { int err = 0; + struct dentry *privroot = REISERFS_SB(s)->priv_root; #ifdef CONFIG_REISERFS_FS_XATTR err = xattr_mount_check(s); if (err) goto error; - if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { mutex_lock(&s->s_root->d_inode->i_mutex); err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); } - if (!err) + + if (privroot->d_inode) { s->s_xattr = reiserfs_xattr_handlers; + mutex_lock(&privroot->d_inode->i_mutex); + if (!REISERFS_SB(s)->xattr_root) { + struct dentry *dentry; + dentry = lookup_one_len(XAROOT_NAME, privroot, + strlen(XAROOT_NAME)); + if (!IS_ERR(dentry)) + REISERFS_SB(s)->xattr_root = dentry; + else + err = PTR_ERR(dentry); + } + mutex_unlock(&privroot->d_inode->i_mutex); + } error: if (err) { @@ -1008,11 +1026,12 @@ error: #endif /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ - s->s_flags = s->s_flags & ~MS_POSIXACL; #ifdef CONFIG_REISERFS_FS_POSIX_ACL if (reiserfs_posixacl(s)) s->s_flags |= MS_POSIXACL; + else #endif + s->s_flags &= ~MS_POSIXACL; return err; } -- cgit v1.2.3 From b82bb72ba7df473461c5e2368a4e7497c8ce76e9 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:16 -0400 Subject: reiserfs: dont associate security.* with xattr files The security.* xattrs are ignored for xattr files, so don't create them. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr_security.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 4d3c20e787c..a92c8792c0f 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -55,8 +55,16 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode, struct reiserfs_security_handle *sec) { int blocks = 0; - int error = security_inode_init_security(inode, dir, &sec->name, - &sec->value, &sec->length); + int error; + + sec->name = NULL; + + /* Don't add selinux attributes on xattrs - they'll never get used */ + if (IS_PRIVATE(dir)) + return 0; + + error = security_inode_init_security(inode, dir, &sec->name, + &sec->value, &sec->length); if (error) { if (error == -EOPNOTSUPP) error = 0; -- cgit v1.2.3 From 677c9b2e393a0cd203bd54e9c18b012b2c73305a Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:17 -0400 Subject: reiserfs: remove privroot hiding in lookup With Al Viro's patch to move privroot lookup to fs mount, there's no need to have special code to hide the privroot in reiserfs_lookup. I've also cleaned up the privroot hiding in reiserfs_readdir_dentry and removed the last user of reiserfs_xattrs(). Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/dir.c | 24 +++++++++++++----------- fs/reiserfs/namei.c | 17 ++--------------- fs/reiserfs/xattr.c | 2 +- 3 files changed, 16 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 67a80d7e59e..45ee3d357c7 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -41,6 +41,18 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, #define store_ih(where,what) copy_item_head (where, what) +static inline bool is_privroot_deh(struct dentry *dir, + struct reiserfs_de_head *deh) +{ + int ret = 0; +#ifdef CONFIG_REISERFS_FS_XATTR + struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; + ret = (dir == dir->d_parent && privroot->d_inode && + deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); +#endif + return ret; +} + int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, filldir_t filldir, loff_t *pos) { @@ -138,18 +150,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, } /* Ignore the .reiserfs_priv entry */ - if (reiserfs_xattrs(inode->i_sb) && - !old_format_only(inode->i_sb) && - dentry == inode->i_sb->s_root && - REISERFS_SB(inode->i_sb)->priv_root && - REISERFS_SB(inode->i_sb)->priv_root->d_inode - && deh_objectid(deh) == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(inode->i_sb)-> - priv_root->d_inode)-> - k_objectid)) { + if (is_privroot_deh(dentry, deh)) continue; - } d_off = deh_offset(deh); *pos = d_off; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index efd4d720718..27157912863 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -338,21 +338,8 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, &path_to_entry, &de); pathrelse(&path_to_entry); if (retval == NAME_FOUND) { - /* Hide the .reiserfs_priv directory */ - if (reiserfs_xattrs(dir->i_sb) && - !old_format_only(dir->i_sb) && - REISERFS_SB(dir->i_sb)->priv_root && - REISERFS_SB(dir->i_sb)->priv_root->d_inode && - de.de_objectid == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(dir->i_sb)->priv_root->d_inode)-> - k_objectid)) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-EACCES); - } - - inode = - reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); + inode = reiserfs_iget(dir->i_sb, + (struct cpu_key *)&(de.de_dir_id)); if (!inode || IS_ERR(inode)) { reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index c77984473db..2237e10c7c7 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -841,7 +841,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) if (!dentry->d_inode) return -EINVAL; - if (!reiserfs_xattrs(dentry->d_sb) || + if (!dentry->d_sb->s_xattr || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) return -EOPNOTSUPP; -- cgit v1.2.3 From 74dbbdd7fdc11763f4698d2f3e684cf4446951e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:07:50 -0400 Subject: New helper: deactivate_locked_super() Does equivalent of up_write(&s->s_umount); deactivate_super(s); However, it does not does not unlock it until it's all over. As the result, it's safe to use to dispose of new superblock on ->get_sb() failure exits - nobody will see the sucker until it's all over. Equivalent using up_write/deactivate_super is safe for that purpose if superblock is either safe to use or has NULL ->s_root when we unlock. Normally filesystems take the required precautions, but a) we do have bugs in that area in some of them. b) up_write/deactivate_super sequence is extremely common, so the helper makes sense anyway. Signed-off-by: Al Viro --- fs/super.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 786fe7d7279..a9dc4c33ef4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -207,6 +207,34 @@ void deactivate_super(struct super_block *s) EXPORT_SYMBOL(deactivate_super); +/** + * deactivate_locked_super - drop an active reference to superblock + * @s: superblock to deactivate + * + * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that + * it does not unlock it until it's all over. As the result, it's safe to + * use to dispose of new superblock on ->get_sb() failure exits - nobody + * will see the sucker until it's all over. Equivalent using up_write + + * deactivate_super is safe for that purpose only if superblock is either + * safe to use or has NULL ->s_root when we unlock. + */ +void deactivate_locked_super(struct super_block *s) +{ + struct file_system_type *fs = s->s_type; + if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { + s->s_count -= S_BIAS-1; + spin_unlock(&sb_lock); + vfs_dq_off(s, 0); + fs->kill_sb(s); + put_filesystem(fs); + put_super(s); + } else { + up_write(&s->s_umount); + } +} + +EXPORT_SYMBOL(deactivate_locked_super); + /** * grab_super - acquire an active reference * @s: reference we are trying to make active @@ -797,8 +825,7 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, sb->s_flags = flags; err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; } @@ -854,8 +881,7 @@ int get_sb_bdev(struct file_system_type *fs_type, if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -EBUSY; goto error_bdev; } @@ -870,8 +896,7 @@ int get_sb_bdev(struct file_system_type *fs_type, sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto error; } @@ -921,8 +946,7 @@ int get_sb_nodev(struct file_system_type *fs_type, error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -952,8 +976,7 @@ int get_sb_single(struct file_system_type *fs_type, s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -1006,8 +1029,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void return mnt; out_sb: dput(mnt->mnt_root); - up_write(&mnt->mnt_sb->s_umount); - deactivate_super(mnt->mnt_sb); + deactivate_locked_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: -- cgit v1.2.3 From 6f5bbff9a1b7d6864a495763448a363bbfa96324 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:34:22 -0400 Subject: Convert obvious places to deactivate_locked_super() Signed-off-by: Al Viro --- fs/9p/vfs_super.c | 5 +---- fs/afs/super.c | 3 +-- fs/btrfs/super.c | 12 ++++-------- fs/cifs/cifsfs.c | 3 +-- fs/devpts/inode.c | 5 ++--- fs/ecryptfs/main.c | 5 ++--- fs/libfs.c | 3 +-- fs/nfs/super.c | 15 +++++---------- fs/proc/root.c | 3 +-- fs/ubifs/super.c | 3 +-- 10 files changed, 19 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 7d23214e559..0d29a57c63e 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -174,10 +174,7 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); return 0; release_sb: - if (sb) { - up_write(&sb->s_umount); - deactivate_super(sb); - } + deactivate_locked_super(sb); free_stat: kfree(st); diff --git a/fs/afs/super.c b/fs/afs/super.c index aee239a048c..2753f16dd31 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -405,8 +405,7 @@ static int afs_get_sb(struct file_system_type *fs_type, sb->s_flags = flags; ret = afs_fill_super(sb, ¶ms); if (ret < 0) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); goto error; } sb->s_options = new_opts; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3536bdb2d7c..6dfae5b28f5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -502,8 +502,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -EBUSY; goto error_close_devices; } @@ -517,8 +516,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto error_free_subvol_name; } @@ -535,15 +533,13 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, mutex_unlock(&s->s_root->d_inode->i_mutex); if (IS_ERR(root)) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = PTR_ERR(root); goto error_free_subvol_name; } if (!root->d_inode) { dput(root); - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -ENXIO; goto error_free_subvol_name; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 355e0efec0c..5e6d35804d7 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -602,8 +602,7 @@ cifs_get_sb(struct file_system_type *fs_type, rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0); if (rc) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return rc; } sb->s_flags |= MS_ACTIVE; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 63a4a59e414..21165cf934f 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -389,11 +389,10 @@ static int devpts_get_sb(struct file_system_type *fs_type, return 0; out_dput: - dput(s->s_root); + dput(s->s_root); /* undo dget() in simple_set_mnt() */ out_undo_sget: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index ccabd5faa04..9f0aa9883c2 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -614,9 +614,8 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, } goto out; out_abort: - dput(sb->s_root); - up_write(&sb->s_umount); - deactivate_super(sb); + dput(sb->s_root); /* aka mnt->mnt_root, as set by get_sb_nodev() */ + deactivate_locked_super(sb); out: return rc; } diff --git a/fs/libfs.c b/fs/libfs.c index cd223190c4e..80046ddf506 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -246,8 +246,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, return 0; Enomem: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return -ENOMEM; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1679a164c8c..d2d67781c57 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2111,8 +2111,7 @@ out_err_nosb: error_splat_root: dput(mntroot); error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto out; } @@ -2208,8 +2207,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error); return error; } @@ -2469,8 +2467,7 @@ out_free: error_splat_root: dput(mntroot); error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto out; } @@ -2564,8 +2561,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error); return error; } @@ -2649,8 +2645,7 @@ out_err_noserver: return error; error_splat_super: - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); return error; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 1e15a2b176e..b080b791d9e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -67,8 +67,7 @@ static int proc_get_sb(struct file_system_type *fs_type, sb->s_flags = flags; err = proc_fill_super(sb); if (err) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index faa44f90608..e9f7a754c4f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2055,8 +2055,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, return 0; out_deact: - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); out_close: ubi_close_volume(ubi); return err; -- cgit v1.2.3 From c96f58573778ddf96ff67108a635f3f642ea63d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:35:04 -0400 Subject: Fix a leak in failure exit in 9p ->get_sb() Signed-off-by: Al Viro --- fs/9p/vfs_super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 0d29a57c63e..ab5547ff29a 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -156,6 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, root = d_alloc_root(inode); if (!root) { + iput(inode); retval = -ENOMEM; goto release_sb; } -- cgit v1.2.3 From a731d12d6ddd1e703770cacb5dfecb155b03ee06 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 6 Apr 2009 16:43:42 -0700 Subject: ocfs2: Use nd_set_link(). ocfs2 was hand-calling vfs_follow_link(), but there's no point to that. Let's use page_follow_link_light() and nd_set_link(). Signed-off-by: Joel Becker Signed-off-by: Al Viro --- fs/ocfs2/symlink.c | 77 ++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index ed0a0cfd68d..579dd1b1110 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -39,6 +39,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_NAMEI #include @@ -54,26 +55,6 @@ #include "buffer_head_io.h" -static char *ocfs2_page_getlink(struct dentry * dentry, - struct page **ppage); -static char *ocfs2_fast_symlink_getlink(struct inode *inode, - struct buffer_head **bh); - -/* get the link contents into pagecache */ -static char *ocfs2_page_getlink(struct dentry * dentry, - struct page **ppage) -{ - struct page * page; - struct address_space *mapping = dentry->d_inode->i_mapping; - page = read_mapping_page(mapping, 0, NULL); - if (IS_ERR(page)) - goto sync_fail; - *ppage = page; - return kmap(page); - -sync_fail: - return (char*)page; -} static char *ocfs2_fast_symlink_getlink(struct inode *inode, struct buffer_head **bh) @@ -128,40 +109,55 @@ out: return ret; } -static void *ocfs2_follow_link(struct dentry *dentry, - struct nameidata *nd) +static void *ocfs2_fast_follow_link(struct dentry *dentry, + struct nameidata *nd) { - int status; - char *link; + int status = 0; + int len; + char *target, *link = ERR_PTR(-ENOMEM); struct inode *inode = dentry->d_inode; - struct page *page = NULL; struct buffer_head *bh = NULL; - - if (ocfs2_inode_is_fast_symlink(inode)) - link = ocfs2_fast_symlink_getlink(inode, &bh); - else - link = ocfs2_page_getlink(dentry, &page); - if (IS_ERR(link)) { - status = PTR_ERR(link); + + mlog_entry_void(); + + BUG_ON(!ocfs2_inode_is_fast_symlink(inode)); + target = ocfs2_fast_symlink_getlink(inode, &bh); + if (IS_ERR(target)) { + status = PTR_ERR(target); mlog_errno(status); goto bail; } - status = vfs_follow_link(nd, link); + /* Fast symlinks can't be large */ + len = strlen(target); + link = kzalloc(len + 1, GFP_NOFS); + if (!link) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + memcpy(link, target, len); + nd_set_link(nd, link); bail: - if (page) { - kunmap(page); - page_cache_release(page); - } brelse(bh); - return ERR_PTR(status); + mlog_exit(status); + return status ? ERR_PTR(status) : link; +} + +static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) +{ + char *link = cookie; + + kfree(link); } const struct inode_operations ocfs2_symlink_inode_operations = { .readlink = page_readlink, - .follow_link = ocfs2_follow_link, + .follow_link = page_follow_link_light, + .put_link = page_put_link, .getattr = ocfs2_getattr, .setattr = ocfs2_setattr, .setxattr = generic_setxattr, @@ -171,7 +167,8 @@ const struct inode_operations ocfs2_symlink_inode_operations = { }; const struct inode_operations ocfs2_fast_symlink_inode_operations = { .readlink = ocfs2_readlink, - .follow_link = ocfs2_follow_link, + .follow_link = ocfs2_fast_follow_link, + .put_link = ocfs2_fast_put_link, .getattr = ocfs2_getattr, .setattr = ocfs2_setattr, .setxattr = generic_setxattr, -- cgit v1.2.3 From c490d79bb70c549e096099576b1df40a8810b0d8 Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:53 +1000 Subject: fs: dcache fix LRU ordering Fix ordering of LRU when moving referenced dentries to the head of the list (they should go to the head of the list in the same order as they were found from the tail, rather than reverse order). Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 1fcffebfb44..75659a6fd1f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -481,7 +481,7 @@ restart: if ((flags & DCACHE_REFERENCED) && (dentry->d_flags & DCACHE_REFERENCED)) { dentry->d_flags &= ~DCACHE_REFERENCED; - list_move_tail(&dentry->d_lru, &referenced); + list_move(&dentry->d_lru, &referenced); spin_unlock(&dentry->d_lock); } else { list_move_tail(&dentry->d_lru, &tmp); -- cgit v1.2.3 From 774e33e70b2bffa8c602d22a5d27c0061a0039cc Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 26 Apr 2009 14:51:17 +0200 Subject: ROMFS: romfs_dev_read() error ignored romfs_dev_read() may return -EIO, but ret is unsigned, so the errorpath isn't taken. Signed-off-by: Roel Kluin Signed-off-by: Al Viro --- fs/romfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/romfs/super.c b/fs/romfs/super.c index c53b5ef8a02..4ab3c03d8f9 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -298,7 +298,8 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos) struct romfs_inode ri; struct inode *i; unsigned long nlen; - unsigned nextfh, ret; + unsigned nextfh; + int ret; umode_t mode; /* we might have to traverse a chain of "hard link" file entries to get -- cgit v1.2.3 From ddbaaf3024d764ced700efb2d818709b90ea6fdd Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 29 Apr 2009 20:14:57 -0400 Subject: NULL noise in fs/super.c:kill_bdev_super() Signed-off-by: H Hartley Sweeten Cc: Subrata Modak Signed-off-by: Al Viro --- fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index a9dc4c33ef4..1943fdf655f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -922,7 +922,7 @@ void kill_block_super(struct super_block *sb) struct block_device *bdev = sb->s_bdev; fmode_t mode = sb->s_mode; - bdev->bd_super = 0; + bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); close_bdev_exclusive(bdev, mode); -- cgit v1.2.3 From 6b3304b531704711286c3359b06922b83fdba015 Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Tue, 31 Mar 2009 19:35:54 +0530 Subject: Make checkpatch.pl shut up on fs/inode.c Code Quality According To Mingo(tm) has been vastly improved, no code has been damaged^Wchanged^Wdamaged. [commit message rewritten -- AV] Signed-off-by: Manish Katiyar Signed-off-by: Al Viro --- fs/inode.c | 81 +++++++++++++++++++++++++++----------------------------------- 1 file changed, 35 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 6ad14a1cd8c..0571983755d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -99,7 +99,7 @@ static DEFINE_MUTEX(iprune_mutex); */ struct inodes_stat_t inodes_stat; -static struct kmem_cache * inode_cachep __read_mostly; +static struct kmem_cache *inode_cachep __read_mostly; static void wake_up_inode(struct inode *inode) { @@ -124,7 +124,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) static struct inode_operations empty_iops; static const struct file_operations empty_fops; - struct address_space * const mapping = &inode->i_data; + struct address_space *const mapping = &inode->i_data; inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; @@ -216,7 +216,7 @@ static struct inode *alloc_inode(struct super_block *sb) return NULL; } -void destroy_inode(struct inode *inode) +void destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); @@ -252,12 +252,11 @@ void inode_init_once(struct inode *inode) mutex_init(&inode->inotify_mutex); #endif } - EXPORT_SYMBOL(inode_init_once); static void init_once(void *foo) { - struct inode * inode = (struct inode *) foo; + struct inode *inode = (struct inode *) foo; inode_init_once(inode); } @@ -265,7 +264,7 @@ static void init_once(void *foo) /* * inode_lock must be held */ -void __iget(struct inode * inode) +void __iget(struct inode *inode) { if (atomic_read(&inode->i_count)) { atomic_inc(&inode->i_count); @@ -289,7 +288,7 @@ void clear_inode(struct inode *inode) { might_sleep(); invalidate_inode_buffers(inode); - + BUG_ON(inode->i_data.nrpages); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); @@ -303,7 +302,6 @@ void clear_inode(struct inode *inode) cd_forget(inode); inode->i_state = I_CLEAR; } - EXPORT_SYMBOL(clear_inode); /* @@ -351,8 +349,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) next = head->next; for (;;) { - struct list_head * tmp = next; - struct inode * inode; + struct list_head *tmp = next; + struct inode *inode; /* * We can reschedule here without worrying about the list's @@ -391,7 +389,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) * fails because there are busy inodes then a non zero value is returned. * If the discard is successful all the inodes have been discarded. */ -int invalidate_inodes(struct super_block * sb) +int invalidate_inodes(struct super_block *sb) { int busy; LIST_HEAD(throw_away); @@ -407,7 +405,6 @@ int invalidate_inodes(struct super_block * sb) return busy; } - EXPORT_SYMBOL(invalidate_inodes); static int can_unuse(struct inode *inode) @@ -504,7 +501,7 @@ static int shrink_icache_memory(int nr, gfp_t gfp_mask) * Nasty deadlock avoidance. We may hold various FS locks, * and we don't want to recurse into the FS that called us * in clear_inode() and friends.. - */ + */ if (!(gfp_mask & __GFP_FS)) return -1; prune_icache(nr); @@ -524,10 +521,13 @@ static void __wait_on_freeing_inode(struct inode *inode); * by hand after calling find_inode now! This simplifies iunique and won't * add any additional branch in the common code. */ -static struct inode * find_inode(struct super_block * sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data) +static struct inode *find_inode(struct super_block *sb, + struct hlist_head *head, + int (*test)(struct inode *, void *), + void *data) { struct hlist_node *node; - struct inode * inode = NULL; + struct inode *inode = NULL; repeat: hlist_for_each_entry(inode, node, head, i_hash) { @@ -548,10 +548,11 @@ repeat: * find_inode_fast is the fast path version of find_inode, see the comment at * iget_locked for details. */ -static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head *head, unsigned long ino) +static struct inode *find_inode_fast(struct super_block *sb, + struct hlist_head *head, unsigned long ino) { struct hlist_node *node; - struct inode * inode = NULL; + struct inode *inode = NULL; repeat: hlist_for_each_entry(inode, node, head, i_hash) { @@ -631,10 +632,10 @@ struct inode *new_inode(struct super_block *sb) * here to attempt to avoid that. */ static unsigned int last_ino; - struct inode * inode; + struct inode *inode; spin_lock_prefetch(&inode_lock); - + inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); @@ -645,7 +646,6 @@ struct inode *new_inode(struct super_block *sb) } return inode; } - EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) @@ -674,7 +674,6 @@ void unlock_new_inode(struct inode *inode) inode->i_state &= ~(I_LOCK|I_NEW); wake_up_inode(inode); } - EXPORT_SYMBOL(unlock_new_inode); /* @@ -683,13 +682,17 @@ EXPORT_SYMBOL(unlock_new_inode); * We no longer cache the sb_flags in i_flags - see fs.h * -- rmk@arm.uk.linux.org */ -static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) +static struct inode *get_new_inode(struct super_block *sb, + struct hlist_head *head, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), + void *data) { - struct inode * inode; + struct inode *inode; inode = alloc_inode(sb); if (inode) { - struct inode * old; + struct inode *old; spin_lock(&inode_lock); /* We released the lock, so.. */ @@ -731,13 +734,14 @@ set_failed: * get_new_inode_fast is the fast path version of get_new_inode, see the * comment at iget_locked for details. */ -static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_head *head, unsigned long ino) +static struct inode *get_new_inode_fast(struct super_block *sb, + struct hlist_head *head, unsigned long ino) { - struct inode * inode; + struct inode *inode; inode = alloc_inode(sb); if (inode) { - struct inode * old; + struct inode *old; spin_lock(&inode_lock); /* We released the lock, so.. */ @@ -823,7 +827,6 @@ struct inode *igrab(struct inode *inode) spin_unlock(&inode_lock); return inode; } - EXPORT_SYMBOL(igrab); /** @@ -924,7 +927,6 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, return ifind(sb, head, test, data, 0); } - EXPORT_SYMBOL(ilookup5_nowait); /** @@ -953,7 +955,6 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, return ifind(sb, head, test, data, 1); } - EXPORT_SYMBOL(ilookup5); /** @@ -976,7 +977,6 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) return ifind_fast(sb, head, ino); } - EXPORT_SYMBOL(ilookup); /** @@ -1015,7 +1015,6 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, */ return get_new_inode(sb, head, test, set, data); } - EXPORT_SYMBOL(iget5_locked); /** @@ -1047,7 +1046,6 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) */ return get_new_inode_fast(sb, head, ino); } - EXPORT_SYMBOL(iget_locked); int insert_inode_locked(struct inode *inode) @@ -1076,7 +1074,6 @@ int insert_inode_locked(struct inode *inode) iput(old); } } - EXPORT_SYMBOL(insert_inode_locked); int insert_inode_locked4(struct inode *inode, unsigned long hashval, @@ -1106,7 +1103,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, iput(old); } } - EXPORT_SYMBOL(insert_inode_locked4); /** @@ -1124,7 +1120,6 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); } - EXPORT_SYMBOL(__insert_inode_hash); /** @@ -1139,7 +1134,6 @@ void remove_inode_hash(struct inode *inode) hlist_del_init(&inode->i_hash); spin_unlock(&inode_lock); } - EXPORT_SYMBOL(remove_inode_hash); /* @@ -1187,7 +1181,6 @@ void generic_delete_inode(struct inode *inode) BUG_ON(inode->i_state != I_CLEAR); destroy_inode(inode); } - EXPORT_SYMBOL(generic_delete_inode); static void generic_forget_inode(struct inode *inode) @@ -1237,12 +1230,11 @@ void generic_drop_inode(struct inode *inode) else generic_forget_inode(inode); } - EXPORT_SYMBOL_GPL(generic_drop_inode); /* * Called when we're dropping the last reference - * to an inode. + * to an inode. * * Call the FS "drop()" function, defaulting to * the legacy UNIX filesystem behaviour.. @@ -1262,7 +1254,7 @@ static inline void iput_final(struct inode *inode) } /** - * iput - put an inode + * iput - put an inode * @inode: inode to put * * Puts an inode, dropping its usage count. If the inode use count hits @@ -1279,7 +1271,6 @@ void iput(struct inode *inode) iput_final(inode); } } - EXPORT_SYMBOL(iput); /** @@ -1290,10 +1281,10 @@ EXPORT_SYMBOL(iput); * Returns the block number on the device holding the inode that * is the disk block number for the block of the file requested. * That is, asked for block 4 of inode 1 the function will return the - * disk block relative to the disk start that holds that block of the + * disk block relative to the disk start that holds that block of the * file. */ -sector_t bmap(struct inode * inode, sector_t block) +sector_t bmap(struct inode *inode, sector_t block) { sector_t res = 0; if (inode->i_mapping->a_ops->bmap) @@ -1425,7 +1416,6 @@ void file_update_time(struct file *file) mark_inode_dirty_sync(inode); mnt_drop_write(file->f_path.mnt); } - EXPORT_SYMBOL(file_update_time); int inode_needs_sync(struct inode *inode) @@ -1436,7 +1426,6 @@ int inode_needs_sync(struct inode *inode) return 1; return 0; } - EXPORT_SYMBOL(inode_needs_sync); int inode_wait(void *word) -- cgit v1.2.3 From e24977d45f45d1675e050dc1a0aaf4bfc4ca9866 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 2 Apr 2009 21:17:03 -0400 Subject: Reduce path_lookup() abuses ... use kern_path() where possible [folded a fix from rdd] Signed-off-by: Al Viro --- fs/gfs2/ops_fstype.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 650a730707b..1ff9473ea75 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1282,21 +1282,21 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags, static struct super_block *get_gfs2_sb(const char *dev_name) { struct super_block *sb; - struct nameidata nd; + struct path path; int error; - error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); + error = kern_path(dev_name, LOOKUP_FOLLOW, &path); if (error) { printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", dev_name, error); return NULL; } - sb = nd.path.dentry->d_inode->i_sb; + sb = path.dentry->d_inode->i_sb; if (sb && (sb->s_type == &gfs2_fs_type)) atomic_inc(&sb->s_active); else sb = NULL; - path_put(&nd.path); + path_put(&path); return sb; } -- cgit v1.2.3 From a44ddbb6d8a8ffe4e34e417048dfdd8f3dd1de4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2009 09:38:49 -0400 Subject: Make open_exec() and sys_uselib() use may_open(), instead of duplicating its parts Signed-off-by: Al Viro --- fs/exec.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 639177b0eea..41ae8e0de72 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -126,11 +126,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - error = inode_permission(nd.path.dentry->d_inode, - MAY_READ | MAY_EXEC | MAY_OPEN); - if (error) - goto exit; - error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN); + error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); if (error) goto exit; @@ -677,10 +673,7 @@ struct file *open_exec(const char *name) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto out_path_put; - err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); - if (err) - goto out_path_put; - err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN); + err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); if (err) goto out_path_put; -- cgit v1.2.3 From 6e8341a11eb21826b7192d0bb88cb5b44900a9af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2009 11:16:22 -0400 Subject: Switch open_exec() and sys_uselib() to do_open_filp() ... and make path_lookup_open() static Signed-off-by: Al Viro --- fs/exec.c | 72 ++++++++++++++++++++++---------------------------------------- fs/namei.c | 13 ++++++------ fs/open.c | 2 +- 3 files changed, 33 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 41ae8e0de72..895823d0149 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -105,36 +105,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt) SYSCALL_DEFINE1(uselib, const char __user *, library) { struct file *file; - struct nameidata nd; char *tmp = getname(library); int error = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - error = path_lookup_open(AT_FDCWD, tmp, - LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - putname(tmp); - } - if (error) + if (IS_ERR(tmp)) + goto out; + + file = do_filp_open(AT_FDCWD, tmp, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_READ | MAY_EXEC | MAY_OPEN); + putname(tmp); + error = PTR_ERR(file); + if (IS_ERR(file)) goto out; error = -EINVAL; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) goto exit; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto exit; - - error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); - if (error) + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; - fsnotify_open(file->f_path.dentry); error = -ENOEXEC; @@ -156,13 +148,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) } read_unlock(&binfmt_lock); } +exit: fput(file); out: return error; -exit: - release_open_intent(&nd); - path_put(&nd.path); - goto out; } #ifdef CONFIG_MMU @@ -657,44 +646,33 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { - struct nameidata nd; struct file *file; int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - if (err) + file = do_filp_open(AT_FDCWD, name, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_EXEC | MAY_OPEN); + if (IS_ERR(file)) goto out; err = -EACCES; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) - goto out_path_put; - - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto out_path_put; - - err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); - if (err) - goto out_path_put; + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - if (IS_ERR(file)) - return file; + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; fsnotify_open(file->f_path.dentry); err = deny_write_access(file); - if (err) { - fput(file); - goto out; - } + if (err) + goto exit; +out: return file; - out_path_put: - release_open_intent(&nd); - path_put(&nd.path); - out: +exit: + fput(file); return ERR_PTR(err); } EXPORT_SYMBOL(open_exec); diff --git a/fs/namei.c b/fs/namei.c index 78f253cd2d4..967c3db9272 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1130,8 +1130,8 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, * @nd: pointer to nameidata * @open_flags: open intent flags */ -int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags) +static int path_lookup_open(int dfd, const char *name, + unsigned int lookup_flags, struct nameidata *nd, int open_flags) { struct file *filp = get_empty_filp(); int err; @@ -1637,18 +1637,19 @@ static int open_will_write_to_fs(int flag, struct inode *inode) * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode) + int open_flag, int mode, int acc_mode) { struct file *filp; struct nameidata nd; - int acc_mode, error; + int error; struct path path; struct dentry *dir; int count = 0; int will_write; int flag = open_to_namei_flags(open_flag); - acc_mode = MAY_OPEN | ACC_MODE(flag); + if (!acc_mode) + acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ if (flag & O_TRUNC) @@ -1869,7 +1870,7 @@ do_link: */ struct file *filp_open(const char *filename, int flags, int mode) { - return do_filp_open(AT_FDCWD, filename, flags, mode); + return do_filp_open(AT_FDCWD, filename, flags, mode, 0); } EXPORT_SYMBOL(filp_open); diff --git a/fs/open.c b/fs/open.c index 377eb25b6ab..bdfbf03615a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1033,7 +1033,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, flags, mode); + struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); -- cgit v1.2.3 From f9dbd05bc97d1d4f17c2057612f6a8e4dbd039e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 19:56:21 -0400 Subject: switch ufs directories to ufs_sync_file() Signed-off-by: Al Viro --- fs/ufs/dir.c | 2 +- fs/ufs/file.c | 2 +- fs/ufs/ufs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index dbbbc466876..6321b797061 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -666,6 +666,6 @@ not_empty: const struct file_operations ufs_dir_operations = { .read = generic_read_dir, .readdir = ufs_readdir, - .fsync = file_fsync, + .fsync = ufs_sync_file, .llseek = generic_file_llseek, }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 625ef17c6f8..2bd3a161571 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -30,7 +30,7 @@ #include "ufs.h" -static int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) +int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int err; diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 69b3427d788..d0c4acd4f1f 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -98,8 +98,8 @@ extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, /* file.c */ extern const struct inode_operations ufs_file_inode_operations; extern const struct file_operations ufs_file_operations; - extern const struct address_space_operations ufs_aops; +extern int ufs_sync_file(struct file *, struct dentry *, int); /* ialloc.c */ extern void ufs_free_inode (struct inode *inode); -- cgit v1.2.3 From 2a32cebd6cbcc43996c3e2d114fa32ba1e71192a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 May 2009 16:05:57 -0400 Subject: Fix races around the access to ->s_options Put generic_show_options read access to s_options under rcu_read_lock, split save_mount_options() into "we are setting it the first time" (uses in foo_fill_super()) and "we are relacing and freeing the old one", synchronize_rcu() before kfree() in the latter. Signed-off-by: Al Viro --- fs/affs/super.c | 3 +-- fs/afs/super.c | 4 ++-- fs/hpfs/super.c | 3 +-- fs/namespace.c | 21 ++++++++++++++++++--- fs/reiserfs/super.c | 3 +-- 5 files changed, 23 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/affs/super.c b/fs/affs/super.c index 5ce695e707f..63f5183f263 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -507,8 +507,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) kfree(new_opts); return -EINVAL; } - kfree(sb->s_options); - sb->s_options = new_opts; + replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; sbi->s_mode = mode; diff --git a/fs/afs/super.c b/fs/afs/super.c index 2753f16dd31..76828e5f8a3 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -408,17 +408,17 @@ static int afs_get_sb(struct file_system_type *fs_type, deactivate_locked_super(sb); goto error; } - sb->s_options = new_opts; + save_mount_options(sb, new_opts); sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); - kfree(new_opts); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); } simple_set_mnt(mnt, sb); afs_put_volume(params.volume); afs_put_cell(params.cell); + kfree(new_opts); _leave(" = 0 [%p]", sb); return 0; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fecf402d7b8..fc77965be84 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -423,8 +423,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(*flags & MS_RDONLY)) mark_dirty(s); - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; diff --git a/fs/namespace.c b/fs/namespace.c index 0d2003fb437..134d494158d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -695,12 +695,16 @@ static inline void mangle(struct seq_file *m, const char *s) */ int generic_show_options(struct seq_file *m, struct vfsmount *mnt) { - const char *options = mnt->mnt_sb->s_options; + const char *options; + + rcu_read_lock(); + options = rcu_dereference(mnt->mnt_sb->s_options); if (options != NULL && options[0]) { seq_putc(m, ','); mangle(m, options); } + rcu_read_unlock(); return 0; } @@ -721,11 +725,22 @@ EXPORT_SYMBOL(generic_show_options); */ void save_mount_options(struct super_block *sb, char *options) { - kfree(sb->s_options); - sb->s_options = kstrdup(options, GFP_KERNEL); + BUG_ON(sb->s_options); + rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL)); } EXPORT_SYMBOL(save_mount_options); +void replace_mount_options(struct super_block *sb, char *options) +{ + char *old = sb->s_options; + rcu_assign_pointer(sb->s_options, options); + if (old) { + synchronize_rcu(); + kfree(old); + } +} +EXPORT_SYMBOL(replace_mount_options); + #ifdef CONFIG_PROC_FS /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d444fe0013a..1215a4f50cd 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1316,8 +1316,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } out_ok: - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; out_err: -- cgit v1.2.3 From 843382370ec614768ac13582405f93635cf3637c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 May 2009 21:52:06 +0900 Subject: nilfs2: ensure to clear dirty state when deleting metadata file block This would fix the following failure during GC: nilfs_cpfile_delete_checkpoints: cannot delete block NILFS: GC failed during preparation: cannot delete checkpoints: err=-2 The problem was caused by a break in state consistency between page cache and btree; the above block was removed from the btree but the page buffering the block was remaining in the page cache in dirty state. This resolves the inconsistency by ensuring to clear dirty state of the page buffering the deleted block. Reported-by: David Arendt Signed-off-by: Ryusuke Konishi --- fs/nilfs2/mdt.c | 2 +- fs/nilfs2/page.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index e1c6777931b..bb78745a0e3 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -300,7 +300,7 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) int err; err = nilfs_bmap_delete(ii->i_bmap, block); - if (likely(!err)) { + if (!err || err == -ENOENT) { nilfs_mdt_mark_dirty(inode); nilfs_mdt_forget_block(inode, block); } diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 1bfbba9c0e9..a2692bbc7b5 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -128,7 +128,8 @@ void nilfs_forget_buffer(struct buffer_head *bh) lock_buffer(bh); clear_buffer_nilfs_volatile(bh); - if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) + clear_buffer_dirty(bh); + if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); clear_buffer_uptodate(bh); -- cgit v1.2.3 From 47eb6b9c8fa963c9f49967ad1d9d7ec947d15b68 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 30 Apr 2009 02:21:00 +0900 Subject: nilfs2: fix possible circular locking for get information ioctls This is one of two patches which are to correct possible circular locking between mm->mmap_sem and nilfs->ns_segctor_sem. The problem was detected by lockdep check as follows: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3-nilfs-00002-g3552613 #6 ------------------------------------------------------- mmap/5418 is trying to acquire lock: (&nilfs->ns_segctor_sem){++++.+}, at: [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] __lock_acquire+0x1066/0x13b0 [] lock_acquire+0xba/0xdd [] might_fault+0x68/0x88 [] copy_to_user+0x2c/0xfc [] nilfs_ioctl_wrap_copy+0x103/0x160 [nilfs2] [] nilfs_ioctl+0x30a/0x3b0 [nilfs2] [] vfs_ioctl+0x22/0x69 [] do_vfs_ioctl+0x460/0x499 [] sys_ioctl+0x40/0x5a [] sysenter_do_call+0x12/0x38 [] 0xffffffff -> #0 (&nilfs->ns_segctor_sem){++++.+}: [] __lock_acquire+0xdcc/0x13b0 [] lock_acquire+0xba/0xdd [] down_read+0x2a/0x3e [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] do_page_fault+0x2fb/0x30a [] error_code+0x72/0x78 [] 0xffffffff other info that might help us debug this: 1 lock held by mmap/5418: #0: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a stack backtrace: Pid: 5418, comm: mmap Not tainted 2.6.30-rc3-nilfs-00002-g3552613 #6 Call Trace: [] ? printk+0xf/0x12 [] print_circular_bug_tail+0xaa/0xb5 [] __lock_acquire+0xdcc/0x13b0 [] ? nilfs_sufile_get_stat+0x1e/0x105 [nilfs2] [] ? up_read+0x16/0x2c [] ? nilfs_sufile_get_stat+0xfa/0x105 [nilfs2] [] lock_acquire+0xba/0xdd [] ? nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] down_read+0x2a/0x3e [] ? nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] ? do_page_fault+0x1d8/0x30a [] ? down_read_trylock+0x39/0x43 [] do_page_fault+0x2fb/0x30a [] ? do_page_fault+0x0/0x30a [] error_code+0x72/0x78 [] ? do_page_fault+0x0/0x30a This makes the lock granularity of nilfs->ns_segctor_sem finer than that of the mmap semaphore for ioctl commands except nilfs_clean_segments(). The successive patch ("nilfs2: fix lock order reversal in nilfs_clean_segments ioctl") is required to fully resolve the problem. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 100 +++++++++++++++++++++--------------------------------- 1 file changed, 38 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index be387c6b2d4..e3c693d37d6 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -147,29 +147,12 @@ static ssize_t nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, - nmembs); -} - -static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_cpinfo); + ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, + nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -195,28 +178,11 @@ static ssize_t nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); -} - -static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_suinfo); + ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -242,28 +208,11 @@ static ssize_t nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, void *buf, size_t size, size_t nmembs) { - return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); -} - -static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, - unsigned int cmd, void __user *argp) -{ - struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - struct nilfs_argv argv; int ret; - if (copy_from_user(&argv, argp, sizeof(argv))) - return -EFAULT; - down_read(&nilfs->ns_segctor_sem); - ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), - nilfs_ioctl_do_get_vinfo); + ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); up_read(&nilfs->ns_segctor_sem); - if (ret < 0) - return ret; - - if (copy_to_user(argp, &argv, sizeof(argv))) - ret = -EFAULT; return ret; } @@ -276,17 +225,21 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, struct nilfs_bdesc *bdescs = buf; int ret, i; + down_read(&nilfs->ns_segctor_sem); for (i = 0; i < nmembs; i++) { ret = nilfs_bmap_lookup_at_level(bmap, bdescs[i].bd_offset, bdescs[i].bd_level + 1, &bdescs[i].bd_blocknr); if (ret < 0) { - if (ret != -ENOENT) + if (ret != -ENOENT) { + up_read(&nilfs->ns_segctor_sem); return ret; + } bdescs[i].bd_blocknr = 0; } } + up_read(&nilfs->ns_segctor_sem); return nmembs; } @@ -300,10 +253,8 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; - down_read(&nilfs->ns_segctor_sem); ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_bdescs); - up_read(&nilfs->ns_segctor_sem); if (ret < 0) return ret; @@ -623,6 +574,29 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return 0; } +static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, + unsigned int cmd, void __user *argp, + ssize_t (*dofunc)(struct the_nilfs *, + __u64 *, int, + void *, size_t, size_t)) + +{ + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + struct nilfs_argv argv; + int ret; + + if (copy_from_user(&argv, argp, sizeof(argv))) + return -EFAULT; + + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); + if (ret < 0) + return ret; + + if (copy_to_user(argp, &argv, sizeof(argv))) + ret = -EFAULT; + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_dentry->d_inode; @@ -634,16 +608,18 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_DELETE_CHECKPOINT: return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); case NILFS_IOCTL_GET_CPINFO: - return nilfs_ioctl_get_cpinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_cpinfo); case NILFS_IOCTL_GET_CPSTAT: return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUINFO: - return nilfs_ioctl_get_suinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_suinfo); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: - /* XXX: rename to ??? */ - return nilfs_ioctl_get_vinfo(inode, filp, cmd, argp); + return nilfs_ioctl_get_info(inode, filp, cmd, argp, + nilfs_ioctl_do_get_vinfo); case NILFS_IOCTL_GET_BDESCS: return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); case NILFS_IOCTL_CLEAN_SEGMENTS: -- cgit v1.2.3 From 4f6b828837b4e3836f2c9ac2f0eab9773b6c1327 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 10 May 2009 22:41:43 +0900 Subject: nilfs2: fix lock order reversal in nilfs_clean_segments ioctl This is a companion patch to ("nilfs2: fix possible circular locking for get information ioctls"). This corrects lock order reversal between mm->mmap_sem and nilfs->ns_segctor_sem in nilfs_clean_segments() which was detected by lockdep check: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.30-rc3-nilfs-00003-g360bdc1 #7 ------------------------------------------------------- mmap/5294 is trying to acquire lock: (&nilfs->ns_segctor_sem){++++.+}, at: [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] do_page_fault+0x1d8/0x30a which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] __lock_acquire+0x1066/0x13b0 [] lock_acquire+0xba/0xdd [] might_fault+0x68/0x88 [] copy_from_user+0x2a/0x111 [] nilfs_ioctl_prepare_clean_segments+0x1d/0xf1 [nilfs2] [] nilfs_clean_segments+0x6d/0x1b9 [nilfs2] [] nilfs_ioctl+0x2ad/0x318 [nilfs2] [] vfs_ioctl+0x22/0x69 [] do_vfs_ioctl+0x460/0x499 [] sys_ioctl+0x40/0x5a [] sysenter_do_call+0x12/0x38 [] 0xffffffff -> #0 (&nilfs->ns_segctor_sem){++++.+}: [] __lock_acquire+0xdcc/0x13b0 [] lock_acquire+0xba/0xdd [] down_read+0x2a/0x3e [] nilfs_transaction_begin+0xb6/0x10c [nilfs2] [] nilfs_page_mkwrite+0xe7/0x154 [nilfs2] [] __do_fault+0x165/0x376 [] handle_mm_fault+0x287/0x5d1 [] do_page_fault+0x2fb/0x30a [] error_code+0x72/0x78 [] 0xffffffff where nilfs_clean_segments() holds: nilfs->ns_segctor_sem -> copy_from_user() --> page fault -> mm->mmap_sem And, page fault path may hold: page fault -> mm->mmap_sem --> nilfs_page_mkwrite() -> nilfs->ns_segctor_sem Even though nilfs_clean_segments() does not perform write access on given user pages, it may cause deadlock because nilfs->ns_segctor_sem is shared per device and mm->mmap_sem can be shared with other tasks. To avoid this problem, this patch moves all calls of copy_from_user() outside the nilfs->ns_segctor_sem lock in the ioctl. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 163 ++++++++++++++++++++++++++++++---------------------- fs/nilfs2/nilfs.h | 3 +- fs/nilfs2/segment.c | 5 +- fs/nilfs2/segment.h | 3 +- 4 files changed, 100 insertions(+), 74 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index e3c693d37d6..49489f68eab 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -25,6 +25,7 @@ #include /* lock_kernel(), unlock_kernel() */ #include /* capable() */ #include /* copy_from_user(), copy_to_user() */ +#include #include #include "nilfs.h" #include "segment.h" @@ -297,10 +298,10 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } -static ssize_t -nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) +static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *inode; struct nilfs_vdesc *vdesc; struct buffer_head *bh, *n; @@ -361,19 +362,10 @@ nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, return ret; } -static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_move_blocks); -} - -static ssize_t -nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, - int flags, void *buf, size_t size, - size_t nmembs) +static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *cpfile = nilfs->ns_cpfile; struct nilfs_period *periods = buf; int ret, i; @@ -387,36 +379,21 @@ nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, return nmembs; } -static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) +static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_delete_checkpoints); -} + size_t nmembs = argv->v_nmembs; + int ret; -static ssize_t -nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) -{ - int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); + ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); return (ret < 0) ? ret : nmembs; } -static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_free_vblocknrs); -} - -static ssize_t -nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, - int flags, void *buf, size_t size, - size_t nmembs) +static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct inode *dat = nilfs_dat_inode(nilfs); struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; struct nilfs_bdesc *bdescs = buf; @@ -455,18 +432,10 @@ nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, return nmembs; } -static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_mark_blocks_dirty); -} - -static ssize_t -nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, - void *buf, size_t size, size_t nmembs) +static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, + struct nilfs_argv *argv, void *buf) { + size_t nmembs = argv->v_nmembs; struct nilfs_sb_info *sbi = nilfs->ns_writer; int ret; @@ -481,31 +450,19 @@ nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, return (ret < 0) ? ret : nmembs; } -static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, - struct nilfs_argv *argv, - int dir) -{ - return nilfs_ioctl_wrap_copy(nilfs, argv, dir, - nilfs_ioctl_do_free_segments); -} - int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, - void __user *argp) + struct nilfs_argv *argv, void **kbufs) { - struct nilfs_argv argv[5]; const char *msg; - int dir, ret; - - if (copy_from_user(argv, argp, sizeof(argv))) - return -EFAULT; + int ret; - dir = _IOC_WRITE; - ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); + ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); if (ret < 0) { msg = "cannot read source blocks"; goto failed; } - ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); + + ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); if (ret < 0) { /* * can safely abort because checkpoints can be removed @@ -514,7 +471,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot delete checkpoints"; goto failed; } - ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); + ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], kbufs[2]); if (ret < 0) { /* * can safely abort because DAT file is updated atomically @@ -523,7 +480,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot delete virtual blocks from DAT file"; goto failed; } - ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); + ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], kbufs[3]); if (ret < 0) { /* * can safely abort because the operation is nondestructive. @@ -531,7 +488,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, msg = "cannot mark copying blocks dirty"; goto failed; } - ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); + ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]); if (ret < 0) { /* * can safely abort because this operation is atomic. @@ -551,9 +508,75 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp) { + struct nilfs_argv argv[5]; + const static size_t argsz[5] = { + sizeof(struct nilfs_vdesc), + sizeof(struct nilfs_period), + sizeof(__u64), + sizeof(struct nilfs_bdesc), + sizeof(__u64), + }; + void __user *base; + void *kbufs[5]; + struct the_nilfs *nilfs; + size_t len, nsegs; + int n, ret; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return nilfs_clean_segments(inode->i_sb, argp); + + if (copy_from_user(argv, argp, sizeof(argv))) + return -EFAULT; + + nsegs = argv[4].v_nmembs; + if (argv[4].v_size != argsz[4]) + return -EINVAL; + /* + * argv[4] points to segment numbers this ioctl cleans. We + * use kmalloc() for its buffer because memory used for the + * segment numbers is enough small. + */ + kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, + nsegs * sizeof(__u64)); + if (IS_ERR(kbufs[4])) + return PTR_ERR(kbufs[4]); + + nilfs = NILFS_SB(inode->i_sb)->s_nilfs; + + for (n = 0; n < 4; n++) { + ret = -EINVAL; + if (argv[n].v_size != argsz[n]) + goto out_free; + + if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) + goto out_free; + + len = argv[n].v_size * argv[n].v_nmembs; + base = (void __user *)(unsigned long)argv[n].v_base; + if (len == 0) { + kbufs[n] = NULL; + continue; + } + + kbufs[n] = vmalloc(len); + if (!kbufs[n]) { + ret = -ENOMEM; + goto out_free; + } + if (copy_from_user(kbufs[n], base, len)) { + ret = -EFAULT; + vfree(kbufs[n]); + goto out_free; + } + } + + ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + + out_free: + while (--n > 0) + vfree(kbufs[n]); + kfree(kbufs[4]); + return ret; } static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 3d0c18a16db..da6fc0bba2e 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -236,7 +236,8 @@ extern int nilfs_sync_file(struct file *, struct dentry *, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); -int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); +int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, + void **); /* inode.c */ extern struct inode *nilfs_new_inode(struct inode *, int); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index fb70ec3be20..22c7f65c240 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2589,7 +2589,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) } } -int nilfs_clean_segments(struct super_block *sb, void __user *argp) +int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, + void **kbufs) { struct nilfs_sb_info *sbi = NILFS_SB(sb); struct nilfs_sc_info *sci = NILFS_SC(sbi); @@ -2606,7 +2607,7 @@ int nilfs_clean_segments(struct super_block *sb, void __user *argp) err = nilfs_init_gcdat_inode(nilfs); if (unlikely(err)) goto out_unlock; - err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); + err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); if (unlikely(err)) goto out_unlock; diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index a98fc1ed0bb..476bdd5df5b 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -222,7 +222,8 @@ extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); extern void nilfs_flush_segment(struct super_block *, ino_t); -extern int nilfs_clean_segments(struct super_block *, void __user *); +extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, + void **); extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, __u64 *, size_t); -- cgit v1.2.3 From 83aca8f480fcd2d9748301a5d060cf947dc75b94 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Mon, 11 May 2009 23:24:47 +0900 Subject: nilfs2: check size of array structured data exchanged via ioctls Although some ioctls of nilfs2 exchange data in the form of indirectly referenced array, some of them lack size check on the array elements. This inserts the missing checks and rejects requests if data of ioctl does not have a valid format. We usually don't have to check size of structures that we associated with ioctl commands because the size is tested implicitly for identifying ioctl command; the checks this patch adds are for the cases where the implicit check is not applied. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 49489f68eab..50ff3f2cdf2 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -254,6 +254,9 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; + if (argv.v_size != sizeof(struct nilfs_bdesc)) + return -EINVAL; + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), nilfs_ioctl_do_get_bdescs); if (ret < 0) @@ -599,6 +602,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, unsigned int cmd, void __user *argp, + size_t membsz, ssize_t (*dofunc)(struct the_nilfs *, __u64 *, int, void *, size_t, size_t)) @@ -611,6 +615,9 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, if (copy_from_user(&argv, argp, sizeof(argv))) return -EFAULT; + if (argv.v_size != membsz) + return -EINVAL; + ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc); if (ret < 0) return ret; @@ -632,16 +639,19 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); case NILFS_IOCTL_GET_CPINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_cpinfo), nilfs_ioctl_do_get_cpinfo); case NILFS_IOCTL_GET_CPSTAT: return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_SUINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_suinfo), nilfs_ioctl_do_get_suinfo); case NILFS_IOCTL_GET_SUSTAT: return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); case NILFS_IOCTL_GET_VINFO: return nilfs_ioctl_get_info(inode, filp, cmd, argp, + sizeof(struct nilfs_vinfo), nilfs_ioctl_do_get_vinfo); case NILFS_IOCTL_GET_BDESCS: return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); -- cgit v1.2.3 From 2b79bc4f7ebbd5af3c8b867968f9f15602d5f802 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 11 May 2009 14:25:34 -0400 Subject: dup2: Fix return value with oldfd == newfd and invalid fd The return value of dup2 when oldfd == newfd and the fd isn't valid is not getting properly sign extended. We end up with 4294967287 instead of -EBADF. I've reproduced this on SLE11 (2.6.27.21), openSUSE Factory (2.6.29-rc5), and Ubuntu 9.04 (2.6.28). This patch uses a signed int for the error value so it is properly extended. Commit 6c5d0512a091480c9f981162227fdb1c9d70e555 introduced this regression. Reported-by: Jiri Dluhos Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/fcntl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index cc8e4de2fee..1ad703150de 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -117,11 +117,13 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) { if (unlikely(newfd == oldfd)) { /* corner case */ struct files_struct *files = current->files; + int retval = oldfd; + rcu_read_lock(); if (!fcheck_files(files, oldfd)) - oldfd = -EBADF; + retval = -EBADF; rcu_read_unlock(); - return oldfd; + return retval; } return sys_dup3(oldfd, newfd, 0); } -- cgit v1.2.3 From 8daed1e549b55827758b3af7b8132a73fc51526f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 11 May 2009 16:10:19 -0400 Subject: nfsd: silence lockdep warning Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 5275097a756..b5348405046 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -229,7 +229,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) goto out; status = vfs_readdir(filp, nfsd4_build_namelist, &names); fput(filp); - mutex_lock(&dir->d_inode->i_mutex); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); while (!list_empty(&names)) { entry = list_entry(names.next, struct name_list, list); @@ -264,7 +264,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - mutex_lock(&rec_dir.dentry->d_inode->i_mutex); + mutex_lock_nested(&rec_dir.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, rec_dir.dentry, namlen); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); -- cgit v1.2.3 From 9c1ee184a30394e54165fa4c15923cabd952c106 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 May 2009 18:36:58 -0400 Subject: ext4: Fix sub-block zeroing for writes into preallocated extents We need to mark the buffer_head mapping preallocated space as new during write_begin. Otherwise we don't zero out the page cache content properly for a partial write. This will cause file corruption with preallocation. Now that we mark the buffer_head new we also need to have a valid buffer_head blocknr so that unmap_underlying_metadata() unmaps the correct block. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 2 ++ fs/ext4/inode.c | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e4033215834..172656c2a3b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2875,6 +2875,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (allocated > max_blocks) allocated = max_blocks; set_buffer_unwritten(bh_result); + bh_result->b_bdev = inode->i_sb->s_bdev; + bh_result->b_blocknr = newblock; goto out2; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e91f978c7f1..d4b634ae06b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2323,6 +2323,13 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, set_buffer_delay(bh_result); } else if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); + /* + * With sub-block writes into unwritten extents + * we also need to mark the buffer as new so that + * the unwritten parts of the buffer gets correctly zeroed. + */ + if (buffer_unwritten(bh_result)) + set_buffer_new(bh_result); ret = 0; } -- cgit v1.2.3 From 33b9817e2ae097c7b8d256e3510ac6c54fc6d9d0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 12 May 2009 14:40:37 -0400 Subject: ext4: Use a fake block number for delayed new buffer_head Use a very large unsigned number (~0xffff) as as the fake block number for the delayed new buffer. The VFS should never try to write out this number, but if it does, this will make it obvious. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d4b634ae06b..0ac31a06422 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2297,6 +2297,10 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int ret = 0; + sector_t invalid_block = ~((sector_t) 0xffff); + + if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) + invalid_block = ~0; BUG_ON(create == 0); BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); @@ -2318,7 +2322,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, /* not enough space to reserve */ return ret; - map_bh(bh_result, inode->i_sb, 0); + map_bh(bh_result, inode->i_sb, invalid_block); set_buffer_new(bh_result); set_buffer_delay(bh_result); } else if (ret > 0) { -- cgit v1.2.3 From bfe3891a5f5d3b78146a45f40e435d14f5ae39dd Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 12 May 2009 13:19:44 -0700 Subject: epoll: fix size check in epoll_create() Fix a size check WRT the manual pages. This was inadvertently broken by commit 9fe5ad9c8cef9ad5873d8ee55d1cf00d9b607df0 ("flag parameters add-on: remove epoll_create size param"). Signed-off-by: Davide Libenzi Cc: Cc: rohit verma Cc: Ulrich Drepper Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a89f370fadb..5458e80fc55 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1212,7 +1212,7 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) SYSCALL_DEFINE1(epoll_create, int, size) { - if (size < 0) + if (size <= 0) return -EINVAL; return sys_epoll_create1(0); -- cgit v1.2.3 From a37b06d589f2c687a38d07569f4ef97c650fde39 Mon Sep 17 00:00:00 2001 From: Doug Chapman Date: Wed, 13 May 2009 02:56:39 +0100 Subject: Squashfs: fix breakage when page size > metadata block size Squashfs is broken on any system where the page size is larger than the metadata size (8192). This is easily fixed by ensuring cache->pages is always > 0. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Doug Chapman Signed-off-by: Andrew Morton Signed-off-by: Phillip Lougher --- fs/squashfs/cache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 1c4739e33af..40c98fa6b5d 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -252,6 +252,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, cache->entries = entries; cache->block_size = block_size; cache->pages = block_size >> PAGE_CACHE_SHIFT; + cache->pages = cache->pages ? cache->pages : 1; cache->name = name; cache->num_waiters = 0; spin_lock_init(&cache->lock); -- cgit v1.2.3 From fffb47b80e8bb3f171ef02b90b1ae22c63983979 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 13 May 2009 02:59:26 +0100 Subject: Squashfs: check page size is not larger than the filesystem block size Normally the block size (by default 128K) will be larger than the page size, unless a non-standard block size has been specified in Mksquashfs, and the page size is larger than 4K. Signed-off-by: Phillip Lougher --- fs/squashfs/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index ffa6edcd2d0..0adc624c956 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -157,6 +157,16 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) if (msblk->block_size > SQUASHFS_FILE_MAX_SIZE) goto failed_mount; + /* + * Check the system page size is not larger than the filesystem + * block size (by default 128K). This is currently not supported. + */ + if (PAGE_CACHE_SIZE > msblk->block_size) { + ERROR("Page size > filesystem block size (%d). This is " + "currently not supported!\n", msblk->block_size); + goto failed_mount; + } + msblk->block_log = le16_to_cpu(sblk->block_log); if (msblk->block_log > SQUASHFS_FILE_MAX_LOG) goto failed_mount; -- cgit v1.2.3 From e5d287539dba264a1d9d7607a25b8c8c61c9d658 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 13 May 2009 03:25:20 +0100 Subject: Squashfs: cody tidying, remove commented out line in Makefile Signed-off-by: Phillip Lougher --- fs/squashfs/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 8258cf9a031..70e3244fa30 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,4 +5,3 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o -#squashfs-y += squashfs2_0.o -- cgit v1.2.3 From f2deae9d4e70793568ef9e85d227abb7bef5b622 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 13 May 2009 15:56:10 +0100 Subject: Remove implementation of readpage from the hugetlbfs_aops The core VM assumes the page size used by the address_space in inode->i_mapping is PAGE_SIZE but hugetlbfs breaks this assumption by inserting pages into the page cache at offsets the core VM considers unexpected. This would not be a problem except that hugetlbfs also provide a ->readpage implementation. As it exists, the core VM can assume the base page size is being used, allocate pages on behalf of the filesystem, insert them into the page cache and call ->readpage to populate them. These pages are the wrong size and at the wrong offset for hugetlbfs causing confusion. This patch deletes the ->readpage implementation for hugetlbfs on the grounds the core VM should not be allocating and populating pages on behalf of hugetlbfs. There should be no existing users of the ->readpage implementation so it should not cause a regression. Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 153d9681192..c1462d43e72 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -312,16 +312,6 @@ out: return retval; } -/* - * Read a page. Again trivial. If it didn't already exist - * in the page cache, it is zero-filled. - */ -static int hugetlbfs_readpage(struct file *file, struct page * page) -{ - unlock_page(page); - return -EINVAL; -} - static int hugetlbfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -701,7 +691,6 @@ static void hugetlbfs_destroy_inode(struct inode *inode) } static const struct address_space_operations hugetlbfs_aops = { - .readpage = hugetlbfs_readpage, .write_begin = hugetlbfs_write_begin, .write_end = hugetlbfs_write_end, .set_page_dirty = hugetlbfs_set_page_dirty, -- cgit v1.2.3 From d8e2f53ac99f4ce7d63807a84f98d1b80df598cf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 14 May 2009 07:46:59 -0400 Subject: cifs: fix error handling in parse_DFS_referrals cifs_strndup_from_ucs returns NULL on error, not an ERR_PTR Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 75e6623a863..5759ba53dc9 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3976,9 +3976,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, max_len = data_end - temp; node->path_name = cifs_strndup_from_ucs(temp, max_len, is_unicode, nls_codepage); - if (IS_ERR(node->path_name)) { - rc = PTR_ERR(node->path_name); - node->path_name = NULL; + if (!node->path_name) { + rc = -ENOMEM; goto parse_DFS_referrals_exit; } @@ -3987,11 +3986,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, max_len = data_end - temp; node->node_name = cifs_strndup_from_ucs(temp, max_len, is_unicode, nls_codepage); - if (IS_ERR(node->node_name)) { - rc = PTR_ERR(node->node_name); - node->node_name = NULL; - goto parse_DFS_referrals_exit; - } + if (!node->node_name) + rc = -ENOMEM; } parse_DFS_referrals_exit: -- cgit v1.2.3 From 2757495c906113896b378bf084708846273c87b2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 14 May 2009 13:10:02 -0400 Subject: Btrfs: init inode ordered_data_close flag properly This flag is used to decide when we need to send a given file through the ordered code to make sure it is fully written before a transaction commits. It was not being properly set to zero when the inode was being setup. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 90c23eb2882..22450bd972d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3122,6 +3122,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->flags = 0; bi->index_cnt = (u64)-1; bi->last_unlink_trans = 0; + bi->ordered_data_close = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); -- cgit v1.2.3 From 76a05b35a320e8c968d0fec8f512a1acae227309 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 14 May 2009 13:24:30 -0400 Subject: Btrfs: Don't loop forever on metadata IO failures When a btrfs metadata read fails, the first thing we try to do is find a good copy on another mirror of the block. If this fails, read_tree_block() ends up returning a buffer that isn't up to date. The btrfs btree reading code was reworked to drop locks and repeat the search when IO was done, but the changes didn't add a check for failed reads. The end result was looping forever on buffers that were never going to become up to date. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a99f1c2a710..fedf8b9f03a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1469,6 +1469,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, u32 blocksize; struct extent_buffer *b = *eb_ret; struct extent_buffer *tmp; + int ret; blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); @@ -1476,6 +1477,10 @@ read_block_for_search(struct btrfs_trans_handle *trans, tmp = btrfs_find_tree_block(root, blocknr, blocksize); if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + /* + * we found an up to date block without sleeping, return + * right away + */ *eb_ret = tmp; return 0; } @@ -1483,7 +1488,9 @@ read_block_for_search(struct btrfs_trans_handle *trans, /* * reduce lock contention at high levels * of the btree by dropping locks before - * we read. + * we read. Don't release the lock on the current + * level because we need to walk this node to figure + * out which blocks to read. */ btrfs_unlock_up_safe(p, level + 1); btrfs_set_path_blocking(p); @@ -1494,10 +1501,21 @@ read_block_for_search(struct btrfs_trans_handle *trans, reada_for_search(root, p, level, slot, key->objectid); btrfs_release_path(NULL, p); + + ret = -EAGAIN; tmp = read_tree_block(root, blocknr, blocksize, gen); - if (tmp) + if (tmp) { + /* + * If the read above didn't mark this buffer up to date, + * it will never end up being up to date. Set ret to EIO now + * and give up so that our caller doesn't loop forever + * on our EAGAINs. + */ + if (!btrfs_buffer_uptodate(tmp, 0)) + ret = -EIO; free_extent_buffer(tmp); - return -EAGAIN; + } + return ret; } /* @@ -1696,6 +1714,9 @@ cow_done: if (ret == -EAGAIN) goto again; + if (ret == -EIO) + goto done; + if (!p->skip_locking) { int lret; @@ -1738,6 +1759,8 @@ done: */ if (!p->leave_spinning) btrfs_set_path_blocking(p); + if (ret < 0) + btrfs_release_path(root, p); return ret; } @@ -4212,6 +4235,11 @@ again: if (ret == -EAGAIN) goto again; + if (ret < 0) { + btrfs_release_path(root, path); + goto done; + } + if (!path->skip_locking) { ret = btrfs_try_spin_lock(next); if (!ret) { @@ -4246,6 +4274,11 @@ again: if (ret == -EAGAIN) goto again; + if (ret < 0) { + btrfs_release_path(root, path); + goto done; + } + if (!path->skip_locking) { btrfs_assert_tree_locked(path->nodes[level]); ret = btrfs_try_spin_lock(next); -- cgit v1.2.3 From cc7b0c9b701a079016183f3546b4d720194b367f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 14 May 2009 13:31:21 -0400 Subject: Btrfs: remove some WARN_ONs in the IO failure path These debugging WARN_ONs make too much console noise during regular IO failures. An IO failure will still generate a number of messages as we verify checksums etc, but these two are not needed. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/inode.c | 1 - 2 files changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0ff16d3331d..4b0ea0b80c2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -848,8 +848,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (ret == 0) set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); - else - WARN_ON(1); return buf; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 22450bd972d..1c8b0190d03 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4296,7 +4296,6 @@ out: } if (err) { free_extent_map(em); - WARN_ON(1); return ERR_PTR(err); } return em; -- cgit v1.2.3 From 5d847a8ed970d17e2734ff9e07a74fe36cceb24e Mon Sep 17 00:00:00 2001 From: Li Hong Date: Thu, 14 May 2009 13:52:21 -0400 Subject: Btrfs: remove outdated comment in btrfs_ioctl_resize() In Li Zefan's commit dae7b665cf6d6e6e733f1c9c16cf55547dd37e33, a combination call of kmalloc() and copy_from_user() is replaced by memdup_user(). So btrfs_ioctl_resize() doesn't use GFP_NOFS any more. Signed-off-by: Li Hong Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 48762aa1e94..db84aa48ab7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -437,10 +437,6 @@ out_unlock: return 0; } -/* - * Called inside transaction, so use GFP_NOFS - */ - static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) { u64 new_size; -- cgit v1.2.3 From 6b65c5c61bf86086817a5ed786c8f45755ac83b3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 14 May 2009 13:52:21 -0400 Subject: Btrfs: make show_options result match actual option names The notreelog and flushoncommit mount options were being printed slightly differently. Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bf0e84c7560..e99510bfbff 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -436,9 +436,9 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) if (btrfs_test_opt(root, SSD)) seq_puts(seq, ",ssd"); if (btrfs_test_opt(root, NOTREELOG)) - seq_puts(seq, ",no-treelog"); + seq_puts(seq, ",notreelog"); if (btrfs_test_opt(root, FLUSHONCOMMIT)) - seq_puts(seq, ",flush-on-commit"); + seq_puts(seq, ",flushoncommit"); if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) seq_puts(seq, ",noacl"); return 0; -- cgit v1.2.3 From 9f55684c2d9869e8cc53595a3fee679958511cfb Mon Sep 17 00:00:00 2001 From: Sankar P Date: Thu, 14 May 2009 13:52:22 -0400 Subject: Btrfs: Spelling fix in btrfs_lookup_first_block_group comments Signed-off-by: Sankar P Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e4966444811..3e2c7c738f2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -312,7 +312,7 @@ btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) } /* - * return the block group that contains teh given bytenr + * return the block group that contains the given bytenr */ struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, -- cgit v1.2.3 From 2a8964d63d50dd2d65d71d342bc7fb6ef4117614 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 14 May 2009 17:05:39 -0400 Subject: ext4: Clear the unwritten buffer_head flag after the extent is initialized The BH_Unwritten flag indicates that the buffer is allocated on disk but has not been written; that is, the disk was part of a persistent preallocation area. That flag should only be set when a get_blocks() function is looking up a inode's logical to physical block mapping. When ext4_get_blocks_wrap() is called with create=1, the uninitialized extent is converted into an initialized one, so the BH_Unwritten flag is no longer appropriate. Hence, we need to make sure the BH_Unwritten is not left set, since the combination of BH_Mapped and BH_Unwritten is not allowed; among other things, it will result ext4's get_block() to be called over and over again during the write_begin phase of write(2). Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0ac31a06422..2a9ffd528dd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1149,6 +1149,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, int retval; clear_buffer_mapped(bh); + clear_buffer_unwritten(bh); /* * Try to see if we can get the block without requesting @@ -1178,6 +1179,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, if (retval > 0 && buffer_mapped(bh)) return retval; + /* + * When we call get_blocks without the create flag, the + * BH_Unwritten flag could have gotten set if the blocks + * requested were part of a uninitialized extent. We need to + * clear this flag now that we are committed to convert all or + * part of the uninitialized extent to be an initialized + * extent. This is because we need to avoid the combination + * of BH_Unwritten and BH_Mapped flags being simultaneously + * set on the buffer_head. + */ + clear_buffer_unwritten(bh); + /* * New blocks allocate and/or writing to uninitialized extent * will possibly result in updating i_data, so we take -- cgit v1.2.3 From 2ec0ae3acec47f628179ee95fe2c4da01b5e9fc4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 15 May 2009 09:07:28 -0400 Subject: ext4: Fix race in ext4_inode_info.i_cached_extent If two CPU's simultaneously call ext4_ext_get_blocks() at the same time, there is nothing protecting the i_cached_extent structure from being used and updated at the same time. This could potentially cause the wrong location on disk to be read or written to, including potentially causing the corruption of the block group descriptors and/or inode table. This bug has been in the ext4 code since almost the very beginning of ext4's development. Fortunately once the data is stored in the page cache cache, ext4_get_blocks() doesn't need to be called, so trying to replicate this problem to the point where we could identify its root cause was *extremely* difficult. Many thanks to Kevin Shanahan for working over several months to be able to reproduce this easily so we could finally nail down the cause of the corruption. Signed-off-by: "Theodore Ts'o" Reviewed-by: "Aneesh Kumar K.V" --- fs/ext4/extents.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 172656c2a3b..e3a55eb8b26 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1841,11 +1841,13 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, { struct ext4_ext_cache *cex; BUG_ON(len == 0); + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; cex->ec_type = type; cex->ec_block = block; cex->ec_len = len; cex->ec_start = start; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); } /* @@ -1902,12 +1904,17 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, struct ext4_extent *ex) { struct ext4_ext_cache *cex; + int ret = EXT4_EXT_CACHE_NO; + /* + * We borrow i_block_reservation_lock to protect i_cached_extent + */ + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; /* has cache valid data? */ if (cex->ec_type == EXT4_EXT_CACHE_NO) - return EXT4_EXT_CACHE_NO; + goto errout; BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && cex->ec_type != EXT4_EXT_CACHE_EXTENT); @@ -1918,11 +1925,11 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, ext_debug("%u cached by %u:%u:%llu\n", block, cex->ec_block, cex->ec_len, cex->ec_start); - return cex->ec_type; + ret = cex->ec_type; } - - /* not in cache */ - return EXT4_EXT_CACHE_NO; +errout: + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return ret; } /* -- cgit v1.2.3 From 1f71ebedb3f8ce9108978168759c8551d873a912 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 14 May 2009 19:38:24 -0700 Subject: devpts: correctly set default options devpts_get_sb() calls memset(0) to clear mount options and calls parse_mount_options() if user specified any mount options. The memset(0) is bogus since the 'mode' and 'ptmxmode' options are non-zero by default. parse_mount_options() restores options to default anyway and can properly deal with NULL mount options. So in devpts_get_sb() remove memset(0) and call parse_mount_options() even for NULL mount options. Bug reported by Eric Paris: http://lkml.org/lkml/2009/5/7/448. Signed-off-by: Sukadev Bhattiprolu Tested-by: Marc Dionne Reported-by: Eric Paris Cc: Christoph Hellwig Cc: Alan Cox Acked-by: Serge Hallyn Cc: Al Viro Cc: "Rafael J. Wysocki" Reviewed-by: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/devpts/inode.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 21165cf934f..c68edb96944 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -90,6 +90,15 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode) #define PARSE_MOUNT 0 #define PARSE_REMOUNT 1 +/* + * parse_mount_options(): + * Set @opts to mount options specified in @data. If an option is not + * specified in @data, set it to its default value. The exception is + * 'newinstance' option which can only be set/cleared on a mount (i.e. + * cannot be changed during remount). + * + * Note: @data may be NULL (in which case all options are set to default). + */ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) { char *p; @@ -355,12 +364,9 @@ static int devpts_get_sb(struct file_system_type *fs_type, struct pts_mount_opts opts; struct super_block *s; - memset(&opts, 0, sizeof(opts)); - if (data) { - error = parse_mount_options(data, PARSE_MOUNT, &opts); - if (error) - return error; - } + error = parse_mount_options(data, PARSE_MOUNT, &opts); + if (error) + return error; if (opts.newinstance) s = sget(fs_type, NULL, set_anon_super, NULL); -- cgit v1.2.3 From 12abb35a03e32c97235fcefdcf2d851be9f82dc2 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:01 -0400 Subject: reiserfs: clean up ifdefs With xattr cleanup even with xattrs disabled, much of the initial setup is still performed. Some #ifdefs are just not needed since the options they protect wouldn't be available anyway. This cleans those up. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/xattr.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 2237e10c7c7..cf949646dd5 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -685,20 +685,6 @@ out: return err; } -/* Actual operations that are exported to VFS-land */ -struct xattr_handler *reiserfs_xattr_handlers[] = { - &reiserfs_xattr_user_handler, - &reiserfs_xattr_trusted_handler, -#ifdef CONFIG_REISERFS_FS_SECURITY - &reiserfs_xattr_security_handler, -#endif -#ifdef CONFIG_REISERFS_FS_POSIX_ACL - &reiserfs_posix_acl_access_handler, - &reiserfs_posix_acl_default_handler, -#endif - NULL -}; - /* * In order to implement different sets of xattr operations for each xattr * prefix with the generic xattr API, a filesystem should create a @@ -922,6 +908,28 @@ static int create_privroot(struct dentry *dentry) return 0; } +#else +int __init reiserfs_xattr_register_handlers(void) { return 0; } +void reiserfs_xattr_unregister_handlers(void) {} +static int create_privroot(struct dentry *dentry) { return 0; } +#endif + +/* Actual operations that are exported to VFS-land */ +struct xattr_handler *reiserfs_xattr_handlers[] = { +#ifdef CONFIG_REISERFS_FS_XATTR + &reiserfs_xattr_user_handler, + &reiserfs_xattr_trusted_handler, +#endif +#ifdef CONFIG_REISERFS_FS_SECURITY + &reiserfs_xattr_security_handler, +#endif +#ifdef CONFIG_REISERFS_FS_POSIX_ACL + &reiserfs_posix_acl_access_handler, + &reiserfs_posix_acl_default_handler, +#endif + NULL +}; + static int xattr_mount_check(struct super_block *s) { /* We need generation numbers to ensure that the oid mapping is correct @@ -941,11 +949,6 @@ static int xattr_mount_check(struct super_block *s) return 0; } -#else -int __init reiserfs_xattr_register_handlers(void) { return 0; } -void reiserfs_xattr_unregister_handlers(void) {} -#endif - /* This will catch lookups from the fs root to .reiserfs_priv */ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) @@ -992,7 +995,6 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) int err = 0; struct dentry *privroot = REISERFS_SB(s)->priv_root; -#ifdef CONFIG_REISERFS_FS_XATTR err = xattr_mount_check(s); if (err) goto error; @@ -1023,14 +1025,11 @@ error: clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); } -#endif /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ -#ifdef CONFIG_REISERFS_FS_POSIX_ACL if (reiserfs_posixacl(s)) s->s_flags |= MS_POSIXACL; else -#endif s->s_flags &= ~MS_POSIXACL; return err; -- cgit v1.2.3 From ceb5edc457f07956c82dccfb54ca8ae7e2a399f0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:02 -0400 Subject: reiserfs: deal with NULL xattr root w/ xattrs disabled This avoids an Oops in open_xa_root that can occur when deleting a file with xattrs disabled. It assumes that the xattr root will be there, and that is not guaranteed. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/super.c | 2 -- fs/reiserfs/xattr.c | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1215a4f50cd..3567fb9e3fb 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -448,13 +448,11 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { -#ifdef CONFIG_REISERFS_FS_XATTR if (REISERFS_SB(s)->xattr_root) { d_invalidate(REISERFS_SB(s)->xattr_root); dput(REISERFS_SB(s)->xattr_root); REISERFS_SB(s)->xattr_root = NULL; } -#endif if (REISERFS_SB(s)->priv_root) { d_invalidate(REISERFS_SB(s)->priv_root); dput(REISERFS_SB(s)->priv_root); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index cf949646dd5..628075ca82c 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -123,7 +123,9 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags) mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); xaroot = dget(REISERFS_SB(sb)->xattr_root); - if (!xaroot->d_inode) { + if (!xaroot) + xaroot = ERR_PTR(-ENODATA); + else if (!xaroot->d_inode) { int err = -ENODATA; if (xattr_may_create(flags)) err = xattr_mkdir(privroot->d_inode, xaroot, 0700); -- cgit v1.2.3 From b83674c0da6558e357c6b482ccf299eeea77d8ef Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:03 -0400 Subject: reiserfs: fixup perms when xattrs are disabled This adds CONFIG_REISERFS_FS_XATTR protection from reiserfs_permission. This is needed to avoid warnings during file deletions and chowns with xattrs disabled. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/xattr.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 628075ca82c..8e7deb0e696 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -871,23 +871,6 @@ static int reiserfs_check_acl(struct inode *inode, int mask) return error; } -int reiserfs_permission(struct inode *inode, int mask) -{ - /* - * We don't do permission checks on the internal objects. - * Permissions are determined by the "owning" object. - */ - if (IS_PRIVATE(inode)) - return 0; - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return generic_permission(inode, mask, NULL); - else - return generic_permission(inode, mask, reiserfs_check_acl); -} - static int create_privroot(struct dentry *dentry) { int err; @@ -951,6 +934,25 @@ static int xattr_mount_check(struct super_block *s) return 0; } +int reiserfs_permission(struct inode *inode, int mask) +{ + /* + * We don't do permission checks on the internal objects. + * Permissions are determined by the "owning" object. + */ + if (IS_PRIVATE(inode)) + return 0; + +#ifdef CONFIG_REISERFS_FS_XATTR + /* + * Stat data v1 doesn't support ACLs. + */ + if (get_inode_sd_version(inode) != STAT_DATA_V1) + return generic_permission(inode, mask, reiserfs_check_acl); +#endif + return generic_permission(inode, mask, NULL); +} + /* This will catch lookups from the fs root to .reiserfs_priv */ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) -- cgit v1.2.3 From 7ee2cb7f32b299c2b06a31fde155457203e4b7dd Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Mon, 18 May 2009 17:41:40 -0400 Subject: nfs: Fix NFS v4 client handling of MAY_EXEC in nfs_permission. The problem is that permission checking is skipped if atomic open is possible, but when exec opens a file, it just opens it O_READONLY which means EXEC permission will not be checked at that time. This problem is observed by the following sequence (executed as root): mount -t nfs4 server:/ /mnt4 echo "ls" >/mnt4/foo chmod 744 /mnt4/foo su guest -c "mnt4/foo" Signed-off-by: Frank Filz Signed-off-by: Trond Myklebust Cc: stable@kernel.org Tested-by: Eugene Teo Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 370b190a09d..89f98e9a024 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1943,7 +1943,8 @@ int nfs_permission(struct inode *inode, int mask) case S_IFREG: /* NFSv4 has atomic_open... */ if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) - && (mask & MAY_OPEN)) + && (mask & MAY_OPEN) + && !(mask & MAY_EXEC)) goto out; break; case S_IFDIR: -- cgit v1.2.3 From 8b6427a2a8f7dd43e9208fb33a3b116d66db4979 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 19 May 2009 09:57:03 -0400 Subject: cifs: fix pointer initialization and checks in cifs_follow_symlink (try #4) This is the third respin of the patch posted yesterday to fix the error handling in cifs_follow_symlink. It also includes a fix for a bogus NULL pointer check in CIFSSMBQueryUnixSymLink that Jeff Moyer spotted. It's possible for CIFSSMBQueryUnixSymLink to return without setting target_path to a valid pointer. If that happens then the current value to which we're initializing this pointer could cause an oops when it's kfree'd. This patch is a little more comprehensive than the last patches. It reorganizes cifs_follow_link a bit for (hopefully) better readability. It should also eliminate the uneeded allocation of full_path on servers without unix extensions (assuming they can get to this point anyway, of which I'm not convinced). On a side note, I'm not sure I agree with the logic of enabling this query even when unix extensions are disabled on the client. It seems like that should disable this as well. But, changing that is outside the scope of this fix, so I've left it alone for now. Reported-by: Jeff Moyer Signed-off-by: Jeff Layton Reviewed-by: Jeff Moyer Reviewed-by: Christoph Hellwig Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- fs/cifs/link.c | 52 ++++++++++++++++++++++++++-------------------------- 2 files changed, 27 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5759ba53dc9..d06260251c3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2475,7 +2475,7 @@ querySymLinkRetry: /* BB FIXME investigate remapping reserved chars here */ *symlinkinfo = cifs_strndup_from_ucs(data_start, count, is_unicode, nls_codepage); - if (!symlinkinfo) + if (!*symlinkinfo) rc = -ENOMEM; } } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index ea9d11e3dcb..cd83c53fcbb 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -107,48 +107,48 @@ void * cifs_follow_link(struct dentry *direntry, struct nameidata *nd) { struct inode *inode = direntry->d_inode; - int rc = -EACCES; + int rc = -ENOMEM; int xid; char *full_path = NULL; - char *target_path = ERR_PTR(-ENOMEM); - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; + char *target_path = NULL; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *tcon = cifs_sb->tcon; xid = GetXid(); - full_path = build_path_from_dentry(direntry); + /* + * For now, we just handle symlinks with unix extensions enabled. + * Eventually we should handle NTFS reparse points, and MacOS + * symlink support. For instance... + * + * rc = CIFSSMBQueryReparseLinkInfo(...) + * + * For now, just return -EACCES when the server doesn't support posix + * extensions. Note that we still allow querying symlinks when posix + * extensions are manually disabled. We could disable these as well + * but there doesn't seem to be any harm in allowing the client to + * read them. + */ + if (!(tcon->ses->capabilities & CAP_UNIX)) { + rc = -EACCES; + goto out; + } + full_path = build_path_from_dentry(direntry); if (!full_path) goto out; cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode)); - cifs_sb = CIFS_SB(inode->i_sb); - pTcon = cifs_sb->tcon; - - /* We could change this to: - if (pTcon->unix_ext) - but there does not seem any point in refusing to - get symlink info if we can, even if unix extensions - turned off for this mount */ - - if (pTcon->ses->capabilities & CAP_UNIX) - rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path, - &target_path, - cifs_sb->local_nls); - else { - /* BB add read reparse point symlink code here */ - /* rc = CIFSSMBQueryReparseLinkInfo */ - /* BB Add code to Query ReparsePoint info */ - /* BB Add MAC style xsymlink check here if enabled */ - } + rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, + cifs_sb->local_nls); + kfree(full_path); +out: if (rc != 0) { kfree(target_path); target_path = ERR_PTR(rc); } - kfree(full_path); -out: FreeXid(xid); nd_set_link(nd, target_path); return NULL; -- cgit v1.2.3 From 703a3b8e5c01cf6fb33c6d8dc99905f889a4e992 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 21 May 2009 22:21:53 +0000 Subject: [CIFS] fix posix open regression Posix open code was not properly adding the file to the list of open files. Fix allocating cifsFileInfo more than once, and adding twice to flist and tlist. Also fix mode setting to be done in one place in these paths. Signed-off-by: Steve French Reviewed-by: Shirish Pargaonkar Tested-by: Jeff Layton Tested-by: Luca Tettamanti --- fs/cifs/dir.c | 14 ++++++------- fs/cifs/file.c | 66 ++++++++++++++++++++++++++++++++-------------------------- 2 files changed, 44 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 11431ed72a7..f49d684edd9 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -225,6 +225,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, if (!(oflags & FMODE_READ)) write_only = true; + mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, pnetfid, presp_data, &oplock, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -310,7 +311,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, return -ENOMEM; } - mode &= ~current_umask(); if (oplockEnabled) oplock = REQ_OPLOCK; @@ -336,7 +336,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, else /* success, no need to query */ goto cifs_create_set_dentry; } else if ((rc != -EIO) && (rc != -EREMOTE) && - (rc != -EOPNOTSUPP)) /* path not found or net err */ + (rc != -EOPNOTSUPP) && (rc != -EINVAL)) goto cifs_create_out; /* else fallthrough to retry, using older open call, this is case where server does not support this SMB level, and @@ -609,7 +609,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ int oplock = 0; - int mode; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -660,13 +659,12 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if (pTcon->unix_ext) { if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && - (nd->flags & LOOKUP_OPEN)) { + (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open) { if (!((nd->intent.open.flags & O_CREAT) && (nd->intent.open.flags & O_EXCL))) { - mode = nd->intent.open.create_mode & - ~current_umask(); rc = cifs_posix_open(full_path, &newInode, - parent_dir_inode->i_sb, mode, + parent_dir_inode->i_sb, + nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); /* @@ -681,6 +679,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, */ if ((rc != -EINVAL) && (rc != -EOPNOTSUPP)) posix_open = true; + else + pTcon->broken_posix_open = true; } } if (!posix_open) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 38c06f82657..302ea15f02e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -130,10 +130,6 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode, struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) { - file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); - if (file->private_data == NULL) - return -ENOMEM; - pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); write_lock(&GlobalSMBSeslock); pCifsInode = CIFS_I(file->f_path.dentry->d_inode); @@ -184,6 +180,38 @@ psx_client_can_cache: return 0; } +static struct cifsFileInfo * +cifs_fill_filedata(struct file *file) +{ + struct list_head *tmp; + struct cifsFileInfo *pCifsFile = NULL; + struct cifsInodeInfo *pCifsInode = NULL; + + /* search inode for this file and fill in file->private_data */ + pCifsInode = CIFS_I(file->f_path.dentry->d_inode); + read_lock(&GlobalSMBSeslock); + list_for_each(tmp, &pCifsInode->openFileList) { + pCifsFile = list_entry(tmp, struct cifsFileInfo, flist); + if ((pCifsFile->pfile == NULL) && + (pCifsFile->pid == current->tgid)) { + /* mode set in cifs_create */ + + /* needed for writepage */ + pCifsFile->pfile = file; + file->private_data = pCifsFile; + break; + } + } + read_unlock(&GlobalSMBSeslock); + + if (file->private_data != NULL) { + return pCifsFile; + } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) + cERROR(1, ("could not find file instance for " + "new file %p", file)); + return NULL; +} + /* all arguments to this function must be checked for validity in caller */ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile, @@ -258,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file) struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; struct cifsInodeInfo *pCifsInode; - struct list_head *tmp; char *full_path = NULL; int desiredAccess; int disposition; @@ -270,32 +297,12 @@ int cifs_open(struct inode *inode, struct file *file) cifs_sb = CIFS_SB(inode->i_sb); tcon = cifs_sb->tcon; - /* search inode for this file and fill in file->private_data */ pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &pCifsInode->openFileList) { - pCifsFile = list_entry(tmp, struct cifsFileInfo, - flist); - if ((pCifsFile->pfile == NULL) && - (pCifsFile->pid == current->tgid)) { - /* mode set in cifs_create */ - - /* needed for writepage */ - pCifsFile->pfile = file; - - file->private_data = pCifsFile; - break; - } - } - read_unlock(&GlobalSMBSeslock); - - if (file->private_data != NULL) { - rc = 0; + pCifsFile = cifs_fill_filedata(file); + if (pCifsFile) { FreeXid(xid); - return rc; - } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) - cERROR(1, ("could not find file instance for " - "new file %p", file)); + return 0; + } full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { @@ -325,6 +332,7 @@ int cifs_open(struct inode *inode, struct file *file) /* no need for special case handling of setting mode on read only files needed here */ + pCifsFile = cifs_fill_filedata(file); cifs_posix_open_inode_helper(inode, file, pCifsInode, pCifsFile, oplock, netfid); goto out; -- cgit v1.2.3 From d5046853634a8d73f28bad3cf68d182c4a99035d Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 22 May 2009 20:36:21 +0900 Subject: nilfs2: fix memory leak in nilfs_ioctl_clean_segments This fixes a new memory leak problem in garbage collection. The problem was brought by the bugfix patch ("nilfs2: fix lock order reversal in nilfs_clean_segments ioctl"). Thanks to Kentaro Suzuki for finding this problem. Reported-by: Kentaro Suzuki Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 50ff3f2cdf2..d6759b92006 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -576,7 +576,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); out_free: - while (--n > 0) + while (--n >= 0) vfree(kbufs[n]); kfree(kbufs[4]); return ret; -- cgit v1.2.3 From 8db14ca12569fe885694bd3d5ff84c2d973d3cb0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 23 May 2009 18:57:25 +0000 Subject: [CIFS] Avoid open on possible directories since Samba now rejects them Small change (mostly formatting) to limit lookup based open calls to file create only. After discussion yesteday on samba-technical about the posix lookup regression, and looking at a problem with cifs posix open to one particular Samba version, Jeff and JRA realized that Samba server's behavior changed in this area (posix open behavior on files vs. directories). To make this behavior consistent, JRA just made a fix to Samba server to alter how it handles open of directories (now returning the equivalent of EISDIR instead of success). Since we don't know at lookup time whether the inode is a directory or file (and thus whether posix open will succeed with most current Samba server), this change avoids the posix open code on lookup open (just issues posix open on creates). This gets the semantic benefits we want (atomicity, posix byte range locks, improved write semantics on newly created files) and file create still is fast, and we avoid the problem that Jeff noticed yesterday with "openat" (and some open directory calls) of non-cached directories to one version of Samba server, and will work with future Samba versions (which include the fix jra just pushed into Samba server). I confirmed this approach with jra yesterday and with Shirish today. Posix open is only called (at lookup time) for file create now. For opens (rather than creates), because we do not know if it is a file or directory yet, and current Samba no longer allows us to do posix open on dirs, we could end up wasting an open call on what turns out to be a dir. For file opens, we wait to call posix open till cifs_open. It could be added here (lookup) in the future but the performance tradeoff of the extra network request when EISDIR or EACCES is returned would have to be weighed against the 50% reduction in network traffic in the other paths. Reviewed-by: Shirish Pargaonkar Tested-by: Jeff Layton CC: Jeremy Allison Signed-off-by: Steve French --- fs/cifs/dir.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f49d684edd9..3758965d73d 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -657,31 +657,36 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); + /* Posix open is only called (at lookup time) for file create now. + * For opens (rather than creates), because we do not know if it + * is a file or directory yet, and current Samba no longer allows + * us to do posix open on dirs, we could end up wasting an open call + * on what turns out to be a dir. For file opens, we wait to call posix + * open till cifs_open. It could be added here (lookup) in the future + * but the performance tradeoff of the extra network request when EISDIR + * or EACCES is returned would have to be weighed against the 50% + * reduction in network traffic in the other paths. + */ if (pTcon->unix_ext) { if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && - (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open) { - if (!((nd->intent.open.flags & O_CREAT) && - (nd->intent.open.flags & O_EXCL))) { - rc = cifs_posix_open(full_path, &newInode, + (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && + (nd->intent.open.flags & O_CREAT)) { + rc = cifs_posix_open(full_path, &newInode, parent_dir_inode->i_sb, nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); - /* - * This code works around a bug in - * samba posix open in samba versions 3.3.1 - * and earlier where create works - * but open fails with invalid parameter. - * If either of these error codes are - * returned, follow the normal lookup. - * Otherwise, the error during posix open - * is handled. - */ - if ((rc != -EINVAL) && (rc != -EOPNOTSUPP)) - posix_open = true; - else - pTcon->broken_posix_open = true; - } + /* + * The check below works around a bug in POSIX + * open in samba versions 3.3.1 and earlier where + * open could incorrectly fail with invalid parameter. + * If either that or op not supported returned, follow + * the normal lookup. + */ + if ((rc == 0) || (rc == -ENOENT)) + posix_open = true; + else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) + pTcon->broken_posix_open = true; } if (!posix_open) rc = cifs_get_inode_info_unix(&newInode, full_path, -- cgit v1.2.3 From d0367a508af9cf97beb202935bb9ad8883d30cd1 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 26 May 2009 14:51:00 -0400 Subject: nfs: fix build error in nfsroot with initconst fix build error with latest kbuild adjustments to initconst. The commit a447c0932445f92ce6f4c1bd020f62c5097a7842 ("vfs: Use const for kernel parser table") changed: static match_table_t __initdata tokens = { to static match_table_t __initconst tokens = { But the missing const causes popwerpc to fail with latest updates to __initconst like this: fs/nfs/nfsroot.c:400: error: __setup_str_nfs_root_setup causes a section type conflict fs/nfs/nfsroot.c:400: error: __setup_str_nfs_root_setup causes a section type conflict The bug is only present with kbuild-next. Following patch has been build tested. Signed-off-by: Sam Ravnborg Cc: Steven Whitehouse Cc: Stephen Rothwell Acked-by: Jan Beulich Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index d9ef602fbc5..e3ed5908820 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -129,7 +129,7 @@ enum { Opt_err }; -static match_table_t __initconst tokens = { +static const match_table_t tokens __initconst = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, -- cgit v1.2.3 From 95baa25c7321eb8613246acbf61b97911cc748d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 May 2009 14:51:00 -0400 Subject: NFSv4: Fix the case where NFSv4 renewal fails If the asynchronous lease renewal fails (usually due to a soft timeout), then we _must_ schedule state recovery in order to ensure that we don't lose the lease unnecessarily or, if the lease is already lost, that we recover the locking state promptly... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a4d24268029..4674f8092da 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2594,12 +2594,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { - switch (task->tk_status) { - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: - case -NFS4ERR_CB_PATH_DOWN: - nfs4_schedule_state_recovery(clp); - } + /* Unless we're shutting down, schedule state recovery! */ + if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) + nfs4_schedule_state_recovery(clp); return; } spin_lock(&clp->cl_lock); -- cgit v1.2.3 From 348ca1029e8bae6e0c49097ad25439b17c5326f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 May 2009 15:46:50 +0100 Subject: FS-Cache: Fixup renamed filenames in comments in internal.h Fix up renamed filenames in comments in fs/fscache/internal.h. Originally, the files were all called fsc-xxx.c, but they got renamed to just xxx.c. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/fscache/internal.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index e0cbd16f6dc..1c341304621 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -28,7 +28,7 @@ #define FSCACHE_MAX_THREADS 32 /* - * fsc-cache.c + * cache.c */ extern struct list_head fscache_cache_list; extern struct rw_semaphore fscache_addremove_sem; @@ -37,7 +37,7 @@ extern struct fscache_cache *fscache_select_cache_for_object( struct fscache_cookie *); /* - * fsc-cookie.c + * cookie.c */ extern struct kmem_cache *fscache_cookie_jar; @@ -45,13 +45,13 @@ extern void fscache_cookie_init_once(void *); extern void __fscache_cookie_put(struct fscache_cookie *); /* - * fsc-fsdef.c + * fsdef.c */ extern struct fscache_cookie fscache_fsdef_index; extern struct fscache_cookie_def fscache_fsdef_netfs_def; /* - * fsc-histogram.c + * histogram.c */ #ifdef CONFIG_FSCACHE_HISTOGRAM extern atomic_t fscache_obj_instantiate_histogram[HZ]; @@ -75,7 +75,7 @@ extern const struct file_operations fscache_histogram_fops; #endif /* - * fsc-main.c + * main.c */ extern unsigned fscache_defer_lookup; extern unsigned fscache_defer_create; @@ -86,14 +86,14 @@ extern int fscache_wait_bit(void *); extern int fscache_wait_bit_interruptible(void *); /* - * fsc-object.c + * object.c */ extern void fscache_withdrawing_object(struct fscache_cache *, struct fscache_object *); extern void fscache_enqueue_object(struct fscache_object *); /* - * fsc-operation.c + * operation.c */ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); @@ -104,7 +104,7 @@ extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); /* - * fsc-proc.c + * proc.c */ #ifdef CONFIG_PROC_FS extern int __init fscache_proc_init(void); @@ -115,7 +115,7 @@ extern void fscache_proc_cleanup(void); #endif /* - * fsc-stats.c + * stats.c */ #ifdef CONFIG_FSCACHE_STATS extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; -- cgit v1.2.3 From 911e690e70540f009125bacd16c017eb1a7b1916 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 May 2009 15:46:55 +0100 Subject: CacheFiles: Fixup renamed filenames in comments in internal.h Fix up renamed filenames in comments in fs/cachefiles/internal.h. Originally, the files were all called cf-xxx.c, but they got renamed to just xxx.c. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/cachefiles/internal.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 19218e1463d..f7c255f9c62 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -122,13 +122,13 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache) } /* - * cf-bind.c + * bind.c */ extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); /* - * cf-daemon.c + * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; @@ -136,17 +136,17 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, unsigned fnr, unsigned bnr); /* - * cf-interface.c + * interface.c */ extern const struct fscache_cache_ops cachefiles_cache_ops; /* - * cf-key.c + * key.c */ extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); /* - * cf-namei.c + * namei.c */ extern int cachefiles_delete_object(struct cachefiles_cache *cache, struct cachefiles_object *object); @@ -165,7 +165,7 @@ extern int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename); /* - * cf-proc.c + * proc.c */ #ifdef CONFIG_CACHEFILES_HISTOGRAM extern atomic_t cachefiles_lookup_histogram[HZ]; @@ -190,7 +190,7 @@ void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) #endif /* - * cf-rdwr.c + * rdwr.c */ extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, struct page *, gfp_t); @@ -205,7 +205,7 @@ extern int cachefiles_write_page(struct fscache_storage *, struct page *); extern void cachefiles_uncache_page(struct fscache_object *, struct page *); /* - * cf-security.c + * security.c */ extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, @@ -225,7 +225,7 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache, } /* - * cf-xattr.c + * xattr.c */ extern int cachefiles_check_object_type(struct cachefiles_object *object); extern int cachefiles_set_object_xattr(struct cachefiles_object *object, -- cgit v1.2.3 From a0d24b295aed7a9daf4ca36bd4784e4d40f82303 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 19 May 2009 12:03:15 +0800 Subject: nfsd: fix hung up of nfs client while sync write data to nfs server Commit 'Short write in nfsd becomes a full write to the client' (31dec2538e45e9fff2007ea1f4c6bae9f78db724) broken the sync write. With the following commands to reproduce: $ mount -t nfs -o sync 192.168.0.21:/nfsroot /mnt $ cd /mnt $ echo aaaa > temp.txt Then nfs client is hung up. In SYNC mode the server alaways return the write count 0 to the client. This is because the value of host_err in nfsd_vfs_write() will be overwrite in SYNC mode by 'host_err=nfsd_sync(file);', and then we return host_err(which is now 0) as write count. This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6c68ffd6b4b..b660435978d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1015,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); if (host_err >= 0) { + *cnt = host_err; nfsdstats.io_write += host_err; fsnotify_modify(file->f_path.dentry); } @@ -1060,10 +1061,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, } dprintk("nfsd: write complete host_err=%d\n", host_err); - if (host_err >= 0) { + if (host_err >= 0) err = 0; - *cnt = host_err; - } else + else err = nfserrno(host_err); out: return err; -- cgit v1.2.3 From 086a377edc969aea6c761176a7e4ff68f264d6fe Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 May 2009 12:36:53 -0700 Subject: sysfs: file.c: use create_singlethread_workqueue() We don't need a kernel thread per CPU for this application. Acked-by: Alex Chiang Cc: Lai Jiangshan Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b1606e07b7a..561a9c050ce 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -723,7 +723,7 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), mutex_unlock(&sysfs_workq_mutex); if (sysfs_workqueue == NULL) { - sysfs_workqueue = create_workqueue("sysfsd"); + sysfs_workqueue = create_singlethread_workqueue("sysfsd"); if (sysfs_workqueue == NULL) { module_put(owner); return -ENOMEM; -- cgit v1.2.3 From 81e2962801bbb4e740c501ca687d5cb857929c04 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Thu, 28 May 2009 17:43:59 +0200 Subject: jffs2: Fix corruption when flash erase/write failure Erase errors such as: "Newly-erased block contained word 0xa4ef223e at offset 0x0296a014" and failure to write the clean marker, moves the offending erase block to erasing list before calling jffs2_erase_failed(). This is bad as jffs2_erase_failed() will also move the block to the bad_list, but is now moving the wrong block, causing FS corruption. Signed-off-by: Joakim Tjernlund Signed-off-by: David Woodhouse --- fs/jffs2/erase.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index c32b4a1ad6c..a0244740b75 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -480,13 +480,6 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb return; filebad: - mutex_lock(&c->erase_free_sem); - spin_lock(&c->erase_completion_lock); - /* Stick it on a list (any list) so erase_failed can take it - right off again. Silly, but shouldn't happen often. */ - list_move(&jeb->list, &c->erasing_list); - spin_unlock(&c->erase_completion_lock); - mutex_unlock(&c->erase_free_sem); jffs2_erase_failed(c, jeb, bad_offset); return; -- cgit v1.2.3 From bd6daba909d8484bd2ccf6017db4028d7a420927 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 28 May 2009 14:34:21 -0700 Subject: procfs: make errno values consistent when open pident vs exit(2) race occurs proc_pident_instantiate() has following call flow. proc_pident_lookup() proc_pident_instantiate() proc_pid_make_inode() And, proc_pident_lookup() has following error handling. const struct pid_entry *p, *last; error = ERR_PTR(-ENOENT); if (!task) goto out_no_task; Then, proc_pident_instantiate should return ENOENT too when racing against exit(2) occur. EINAL has two bad reason. - it implies caller is wrong. bad the race isn't caller's mistake. - man 2 open don't explain EINVAL. user often don't handle it. Note: Other proc_pid_make_inode() caller already use ENOENT properly. Acked-by: Eric W. Biederman Cc: Alexey Dobriyan Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index fb45615943c..3326bbf9ab9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1956,7 +1956,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; - struct dentry *error = ERR_PTR(-EINVAL); + struct dentry *error = ERR_PTR(-ENOENT); inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) -- cgit v1.2.3 From c3dc5bec05a2ae03a72ef82e321d77fb549d951c Mon Sep 17 00:00:00 2001 From: Oskar Schirmer Date: Thu, 28 May 2009 14:34:31 -0700 Subject: flat: fix data sections alignment The flat loader uses an architecture's flat_stack_align() to align the stack but assumes word-alignment is enough for the data sections. However, on the Xtensa S6000 we have registers up to 128bit width which can be used from userspace and therefor need userspace stack and data-section alignment of at least this size. This patch drops flat_stack_align() and uses the same alignment that is required for slab caches, ARCH_SLAB_MINALIGN, or wordsize if it's not defined by the architecture. It also fixes m32r which was obviously kaput, aligning an uninitialized stack entry instead of the stack pointer. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Oskar Schirmer Cc: David Howells Cc: Russell King Cc: Bryan Wu Cc: Geert Uytterhoeven Acked-by: Paul Mundt Cc: Greg Ungerer Signed-off-by: Johannes Weiner Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_flat.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 5cebf0b3779..697f6b5f131 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -41,6 +41,7 @@ #include #include #include +#include /****************************************************************************/ @@ -54,6 +55,18 @@ #define DBG_FLT(a...) #endif +/* + * User data (stack, data section and bss) needs to be aligned + * for the same reasons as SLAB memory is, and to the same amount. + * Avoid duplicating architecture specific code by using the same + * macro as with SLAB allocation: + */ +#ifdef ARCH_SLAB_MINALIGN +#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN) +#else +#define FLAT_DATA_ALIGN (sizeof(void *)) +#endif + #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ #define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ @@ -114,20 +127,18 @@ static unsigned long create_flat_tables( int envc = bprm->envc; char uninitialized_var(dummy); - sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p); + sp = (unsigned long *)p; + sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); + sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN); + argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); + envp = argv + (argc + 1); - sp -= envc+1; - envp = sp; - sp -= argc+1; - argv = sp; - - flat_stack_align(sp); if (flat_argvp_envp_on_stack()) { - --sp; put_user((unsigned long) envp, sp); - --sp; put_user((unsigned long) argv, sp); + put_user((unsigned long) envp, sp + 2); + put_user((unsigned long) argv, sp + 1); } - put_user(argc,--sp); + put_user(argc, sp); current->mm->arg_start = (unsigned long) p; while (argc-->0) { put_user((unsigned long) p, argv++); @@ -558,7 +569,9 @@ static int load_flat_file(struct linux_binprm * bprm, ret = realdatastart; goto err; } - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); + datapos = ALIGN(realdatastart + + MAX_SHARED_LIBS * sizeof(unsigned long), + FLAT_DATA_ALIGN); DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n", (int)(data_len + bss_len + stack_len), (int)datapos); @@ -604,9 +617,12 @@ static int load_flat_file(struct linux_binprm * bprm, } realdatastart = textpos + ntohl(hdr->data_start); - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); - reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) + - MAX_SHARED_LIBS * sizeof(unsigned long)); + datapos = ALIGN(realdatastart + + MAX_SHARED_LIBS * sizeof(unsigned long), + FLAT_DATA_ALIGN); + + reloc = (unsigned long *) + (datapos + (ntohl(hdr->reloc_start) - text_len)); memp = textpos; memp_size = len; #ifdef CONFIG_BINFMT_ZFLAT @@ -854,7 +870,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) stack_len = TOP_OF_ARGS - bprm->p; /* the strings */ stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ - + stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ res = load_flat_file(bprm, &libinfo, 0, &stack_len); if (res > (unsigned long)-4096) -- cgit v1.2.3 From 62013ab5d5df297a01ae5863b5c26d758ec0af7f Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 30 May 2009 21:50:58 +0900 Subject: nilfs2: fix bh leak in nilfs_cpfile_delete_checkpoints function The nilfs_cpfile_delete_checkpoints() wrongly skips brelse() for the header block of checkpoint file in case of errors. This fixes the leak bug. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/cpfile.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index e90b60dfced..300f1cdfa86 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -311,7 +311,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) { if (ret != -ENOENT) - goto out_sem; + goto out_header; /* skip hole */ ret = 0; continue; @@ -344,7 +344,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; printk(KERN_ERR "%s: cannot delete block\n", __func__); - goto out_sem; + goto out_header; } } @@ -361,6 +361,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, nilfs_mdt_mark_dirty(cpfile); kunmap_atomic(kaddr, KM_USER0); } + + out_header: brelse(header_bh); out_sem: -- cgit v1.2.3