From 6d59e7f582ef1c1988542d0fc3b36d0087b757ce Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2008 15:48:17 -0400 Subject: [PATCH] move a bunch of declarations to fs/internal.h Signed-off-by: Al Viro --- fs/internal.h | 11 +++++++++++ fs/pnode.c | 1 + fs/super.c | 1 + 3 files changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/internal.h b/fs/internal.h index 392e8ccd6fc..80aa9a02337 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -43,3 +43,14 @@ extern void __init chrdev_init(void); * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); + +extern void free_vfsmnt(struct vfsmount *); +extern struct vfsmount *alloc_vfsmnt(const char *); +extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); +extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, + struct vfsmount *); +extern void release_mounts(struct list_head *); +extern void umount_tree(struct vfsmount *, int, struct list_head *); +extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); + +extern void __init mnt_init(void); diff --git a/fs/pnode.c b/fs/pnode.c index 1d8f5447f3f..a9e0d6fadbc 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -9,6 +9,7 @@ #include #include #include +#include "internal.h" #include "pnode.h" /* return the next shared peer mount of @p */ diff --git a/fs/super.c b/fs/super.c index 1f8f05ede43..4798350b2bc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -39,6 +39,7 @@ #include #include #include +#include "internal.h" LIST_HEAD(super_blocks); -- cgit v1.2.3 From 1a60a280778ff90270fc7390d9ec102f713a5a29 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2008 16:19:49 -0400 Subject: [PATCH] lock exclusively in collect_mounts() and drop_collected_mounts() Taking namespace_sem shared there isn't worth the trouble, especially with vfsmount ID allocation about to be added. That way we know that umount_tree(), copy_tree() and clone_mnt() are _always_ serialized by namespace_sem. umount_tree() still needs vfsmount_lock (it manipulates hash chains, among other things), but that's a separate story. Signed-off-by: Al Viro --- fs/namespace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 678f7ce060f..af2fb3707d0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1091,20 +1091,20 @@ Enomem: struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) { struct vfsmount *tree; - down_read(&namespace_sem); + down_write(&namespace_sem); tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); - up_read(&namespace_sem); + up_write(&namespace_sem); return tree; } void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); - down_read(&namespace_sem); + down_write(&namespace_sem); spin_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); spin_unlock(&vfsmount_lock); - up_read(&namespace_sem); + up_write(&namespace_sem); release_mounts(&umount_list); } -- cgit v1.2.3 From b5266eb4c8d1a2887a19aaec8144ee4ad1b054c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2008 17:48:24 -0400 Subject: [PATCH] switch a bunch of LSM hooks from nameidata to path Namely, ones from namespace.c Signed-off-by: Al Viro --- fs/namespace.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index af2fb3707d0..87d2d82010b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1220,7 +1220,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) if (IS_DEADDIR(nd->path.dentry->d_inode)) goto out_unlock; - err = security_sb_check_sb(mnt, nd); + err = security_sb_check_sb(mnt, &nd->path); if (err) goto out_unlock; @@ -1230,7 +1230,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) out_unlock: mutex_unlock(&nd->path.dentry->d_inode->i_mutex); if (!err) - security_sb_post_addmount(mnt, nd); + security_sb_post_addmount(mnt, &nd->path); return err; } @@ -1746,7 +1746,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (retval) return retval; - retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page); + retval = security_sb_mount(dev_name, &nd.path, + type_page, flags, data_page); if (retval) goto dput_out; @@ -2007,7 +2008,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, if (error) goto out1; - error = security_sb_pivotroot(&old_nd, &new_nd); + error = security_sb_pivotroot(&old_nd.path, &new_nd.path); if (error) { path_put(&old_nd.path); goto out1; @@ -2070,7 +2071,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); spin_unlock(&vfsmount_lock); chroot_fs_refs(&user_nd.path, &new_nd.path); - security_sb_post_pivotroot(&user_nd, &new_nd); + security_sb_post_pivotroot(&user_nd.path, &new_nd.path); error = 0; path_put(&root_parent); path_put(&parent_path); -- cgit v1.2.3 From 8c3ee42e80ccead805806b3cb50b9855ceb957a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2008 18:00:39 -0400 Subject: [PATCH] get rid of more nameidata passing in namespace.c Further reduction of stack footprint (sys_pivot_root()); lose useless BKL in there, while we are at it. Signed-off-by: Al Viro --- fs/namespace.c | 53 +++++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 87d2d82010b..1bf302d0478 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1205,32 +1205,32 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, return 0; } -static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) +static int graft_tree(struct vfsmount *mnt, struct path *path) { int err; if (mnt->mnt_sb->s_flags & MS_NOUSER) return -EINVAL; - if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != + if (S_ISDIR(path->dentry->d_inode->i_mode) != S_ISDIR(mnt->mnt_root->d_inode->i_mode)) return -ENOTDIR; err = -ENOENT; - mutex_lock(&nd->path.dentry->d_inode->i_mutex); - if (IS_DEADDIR(nd->path.dentry->d_inode)) + mutex_lock(&path->dentry->d_inode->i_mutex); + if (IS_DEADDIR(path->dentry->d_inode)) goto out_unlock; - err = security_sb_check_sb(mnt, &nd->path); + err = security_sb_check_sb(mnt, path); if (err) goto out_unlock; err = -ENOENT; - if (IS_ROOT(nd->path.dentry) || !d_unhashed(nd->path.dentry)) - err = attach_recursive_mnt(mnt, &nd->path, NULL); + if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) + err = attach_recursive_mnt(mnt, path, NULL); out_unlock: - mutex_unlock(&nd->path.dentry->d_inode->i_mutex); + mutex_unlock(&path->dentry->d_inode->i_mutex); if (!err) - security_sb_post_addmount(mnt, &nd->path); + security_sb_post_addmount(mnt, path); return err; } @@ -1294,7 +1294,7 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name, if (!mnt) goto out; - err = graft_tree(mnt, nd); + err = graft_tree(mnt, &nd->path); if (err) { LIST_HEAD(umount_list); spin_lock(&vfsmount_lock); @@ -1501,7 +1501,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, goto unlock; newmnt->mnt_flags = mnt_flags; - if ((err = graft_tree(newmnt, nd))) + if ((err = graft_tree(newmnt, &nd->path))) goto unlock; if (fslist) /* add to the specified expiration list */ @@ -1987,15 +1987,13 @@ asmlinkage long sys_pivot_root(const char __user * new_root, const char __user * put_old) { struct vfsmount *tmp; - struct nameidata new_nd, old_nd, user_nd; - struct path parent_path, root_parent; + struct nameidata new_nd, old_nd; + struct path parent_path, root_parent, root; int error; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - lock_kernel(); - error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new_nd); if (error) @@ -2015,7 +2013,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, } read_lock(¤t->fs->lock); - user_nd.path = current->fs->root; + root = current->fs->root; path_get(¤t->fs->root); read_unlock(¤t->fs->lock); down_write(&namespace_sem); @@ -2023,9 +2021,9 @@ asmlinkage long sys_pivot_root(const char __user * new_root, error = -EINVAL; if (IS_MNT_SHARED(old_nd.path.mnt) || IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) || - IS_MNT_SHARED(user_nd.path.mnt->mnt_parent)) + IS_MNT_SHARED(root.mnt->mnt_parent)) goto out2; - if (!check_mnt(user_nd.path.mnt)) + if (!check_mnt(root.mnt)) goto out2; error = -ENOENT; if (IS_DEADDIR(new_nd.path.dentry->d_inode)) @@ -2035,13 +2033,13 @@ asmlinkage long sys_pivot_root(const char __user * new_root, if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry)) goto out2; error = -EBUSY; - if (new_nd.path.mnt == user_nd.path.mnt || - old_nd.path.mnt == user_nd.path.mnt) + if (new_nd.path.mnt == root.mnt || + old_nd.path.mnt == root.mnt) goto out2; /* loop, on the same file system */ error = -EINVAL; - if (user_nd.path.mnt->mnt_root != user_nd.path.dentry) + if (root.mnt->mnt_root != root.dentry) goto out2; /* not a mountpoint */ - if (user_nd.path.mnt->mnt_parent == user_nd.path.mnt) + if (root.mnt->mnt_parent == root.mnt) goto out2; /* not attached */ if (new_nd.path.mnt->mnt_root != new_nd.path.dentry) goto out2; /* not a mountpoint */ @@ -2063,27 +2061,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root, } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) goto out3; detach_mnt(new_nd.path.mnt, &parent_path); - detach_mnt(user_nd.path.mnt, &root_parent); + detach_mnt(root.mnt, &root_parent); /* mount old root on put_old */ - attach_mnt(user_nd.path.mnt, &old_nd.path); + attach_mnt(root.mnt, &old_nd.path); /* mount new_root on / */ attach_mnt(new_nd.path.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); spin_unlock(&vfsmount_lock); - chroot_fs_refs(&user_nd.path, &new_nd.path); - security_sb_post_pivotroot(&user_nd.path, &new_nd.path); + chroot_fs_refs(&root, &new_nd.path); + security_sb_post_pivotroot(&root, &new_nd.path); error = 0; path_put(&root_parent); path_put(&parent_path); out2: mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); up_write(&namespace_sem); - path_put(&user_nd.path); + path_put(&root); path_put(&old_nd.path); out1: path_put(&new_nd.path); out0: - unlock_kernel(); return error; out3: spin_unlock(&vfsmount_lock); -- cgit v1.2.3 From 4e1b36fb485dd81b0818ef1bc8fb5c0f2923a283 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Mar 2008 00:16:03 -0400 Subject: [PATCH] umount_tree() will unhash everything itself Signed-off-by: Al Viro --- fs/pnode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/pnode.c b/fs/pnode.c index a9e0d6fadbc..f968e35d978 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -212,8 +212,7 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, out: spin_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { - child = list_entry(tmp_list.next, struct vfsmount, mnt_hash); - list_del_init(&child->mnt_hash); + child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); umount_tree(child, 0, &umount_list); } spin_unlock(&vfsmount_lock); -- cgit v1.2.3 From 521b5d0c40386f4a9805cdec7bd979fc96a86aeb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Mar 2008 00:46:41 -0400 Subject: [PATCH] teach seq_file to discard entries Allow ->show() return SEQ_SKIP; that will discard all output from that element and move on. Signed-off-by: Al Viro --- fs/pnode.h | 1 + fs/seq_file.c | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/pnode.h b/fs/pnode.h index f249be2fee7..973c3f825e7 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -35,4 +35,5 @@ int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, struct list_head *); int propagate_umount(struct list_head *); int propagate_mount_busy(struct vfsmount *, int); +void mnt_release_group_id(struct vfsmount *); #endif /* _LINUX_PNODE_H */ diff --git a/fs/seq_file.c b/fs/seq_file.c index 853770274f2..bf2bcfd4bcf 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -25,6 +25,7 @@ * into the buffer. In case of error ->start() and ->next() return * ERR_PTR(error). In the end of sequence they return %NULL. ->show() * returns 0 in case of success and negative number in case of error. + * Returning SEQ_SKIP means "discard this element and move on". */ int seq_open(struct file *file, const struct seq_operations *op) { @@ -114,8 +115,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (!p || IS_ERR(p)) break; err = m->op->show(m, p); - if (err) + if (err < 0) break; + if (unlikely(err)) + m->count = 0; if (m->count < m->size) goto Fill; m->op->stop(m, p); @@ -140,9 +143,10 @@ Fill: break; } err = m->op->show(m, p); - if (err || m->count == m->size) { + if (m->count == m->size || err) { m->count = offs; - break; + if (likely(err <= 0)) + break; } pos = next; } @@ -199,8 +203,12 @@ static int traverse(struct seq_file *m, loff_t offset) if (IS_ERR(p)) break; error = m->op->show(m, p); - if (error) + if (error < 0) break; + if (unlikely(error)) { + error = 0; + m->count = 0; + } if (m->count == m->size) goto Eoverflow; if (pos + m->count > offset) { -- cgit v1.2.3 From ed1524371716466e9c762808b02601d0d0276a92 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Apr 2008 19:51:27 -0400 Subject: [PATCH] double-free of inode on alloc_file() failure exit in create_write_pipe() Duh... Fortunately, the bug is quite recent (post-2.6.25) and, embarrassingly, mine ;-/ Signed-off-by: Al Viro --- fs/pipe.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 8be381bbcb5..f73492b6817 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -988,7 +988,10 @@ struct file *create_write_pipe(void) return f; err_dentry: + free_pipe_info(inode); dput(dentry); + return ERR_PTR(err); + err_inode: free_pipe_info(inode); iput(inode); -- cgit v1.2.3 From 9b4f526cdc0f95f635607dfba6ac788b3deca188 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 22 Apr 2008 01:32:44 -0400 Subject: [PATCH] proc_readfd_common() race fix Since we drop the rcu_read_lock inside the loop, we can't assume that files->fdt will remain unchanged (and not freed) between iterations. Signed-off-by: Al Viro --- fs/proc/base.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 81d7d145292..7313c62e3e9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1626,7 +1626,6 @@ static int proc_readfd_common(struct file * filp, void * dirent, unsigned int fd, ino; int retval; struct files_struct * files; - struct fdtable *fdt; retval = -ENOENT; if (!p) @@ -1649,9 +1648,8 @@ static int proc_readfd_common(struct file * filp, void * dirent, if (!files) goto out; rcu_read_lock(); - fdt = files_fdtable(files); for (fd = filp->f_pos-2; - fd < fdt->max_fds; + fd < files_fdtable(files)->max_fds; fd++, filp->f_pos++) { char name[PROC_NUMBUF]; int len; -- cgit v1.2.3