From d70b67c8bc72ee23b55381bd6a884f4796692f77 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 2 Jul 2008 21:30:15 +0200
Subject: [patch] vfs: fix lookup on deleted directory

Lookup can install a child dentry for a deleted directory.  This keeps
the directory dentry alive, and the inode pinned in the cache and on
disk, even after all external references have gone away.

This isn't a big problem normally, since memory pressure or umount
will clear out the directory dentry and its children, releasing the
inode.  But for UBIFS this causes problems because its orphan area can
overflow.

Fix this by returning ENOENT for all lookups on a S_DEAD directory
before creating a child dentry.

Thanks to Zoltan Sogor for noticing this while testing UBIFS, and
Artem for the excellent analysis of the problem and testing.

Reported-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 01e67dddcc3..3b26a240ade 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -519,7 +519,14 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 	 */
 	result = d_lookup(parent, name);
 	if (!result) {
-		struct dentry * dentry = d_alloc(parent, name);
+		struct dentry *dentry;
+
+		/* Don't create child dentry for a dead directory. */
+		result = ERR_PTR(-ENOENT);
+		if (IS_DEADDIR(dir))
+			goto out_unlock;
+
+		dentry = d_alloc(parent, name);
 		result = ERR_PTR(-ENOMEM);
 		if (dentry) {
 			result = dir->i_op->lookup(dir, dentry, nd);
@@ -528,6 +535,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
 			else
 				result = dentry;
 		}
+out_unlock:
 		mutex_unlock(&dir->i_mutex);
 		return result;
 	}
@@ -1317,7 +1325,14 @@ static struct dentry *__lookup_hash(struct qstr *name,
 
 	dentry = cached_lookup(base, name, nd);
 	if (!dentry) {
-		struct dentry *new = d_alloc(base, name);
+		struct dentry *new;
+
+		/* Don't create child dentry for a dead directory. */
+		dentry = ERR_PTR(-ENOENT);
+		if (IS_DEADDIR(inode))
+			goto out;
+
+		new = d_alloc(base, name);
 		dentry = ERR_PTR(-ENOMEM);
 		if (!new)
 			goto out;
-- 
cgit v1.2.3


From d2d9648ec6858e19d16a0b16da62534e85888653 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 1 Jul 2008 14:16:09 +0200
Subject: [PATCH] reuse xxx_fifo_fops for xxx_pipe_fops

Merge fifo and pipe file_operations.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fifo.c |  8 ++++----
 fs/pipe.c | 51 ++++++++-------------------------------------------
 2 files changed, 12 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/fifo.c b/fs/fifo.c
index 9785e36f81e..987bf941149 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -57,7 +57,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
 	 *  opened, even when there is no process writing the FIFO.
 	 */
-		filp->f_op = &read_fifo_fops;
+		filp->f_op = &read_pipefifo_fops;
 		pipe->r_counter++;
 		if (pipe->readers++ == 0)
 			wake_up_partner(inode);
@@ -86,7 +86,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 		if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
 			goto err;
 
-		filp->f_op = &write_fifo_fops;
+		filp->f_op = &write_pipefifo_fops;
 		pipe->w_counter++;
 		if (!pipe->writers++)
 			wake_up_partner(inode);
@@ -105,7 +105,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 *  This implementation will NEVER block on a O_RDWR open, since
 	 *  the process can at least talk to itself.
 	 */
-		filp->f_op = &rdwr_fifo_fops;
+		filp->f_op = &rdwr_pipefifo_fops;
 
 		pipe->readers++;
 		pipe->writers++;
@@ -151,5 +151,5 @@ err_nocleanup:
  * depending on the access mode of the file...
  */
 const struct file_operations def_fifo_fops = {
-	.open		= fifo_open,	/* will set read or write pipe_fops */
+	.open		= fifo_open,	/* will set read_ or write_pipefifo_fops */
 };
diff --git a/fs/pipe.c b/fs/pipe.c
index 10c4e9aa5c4..fcba6542b8d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -777,45 +777,10 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
 /*
  * The file_operations structs are not static because they
  * are also used in linux/fs/fifo.c to do operations on FIFOs.
+ *
+ * Pipes reuse fifos' file_operations structs.
  */
-const struct file_operations read_fifo_fops = {
-	.llseek		= no_llseek,
-	.read		= do_sync_read,
-	.aio_read	= pipe_read,
-	.write		= bad_pipe_w,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_read_open,
-	.release	= pipe_read_release,
-	.fasync		= pipe_read_fasync,
-};
-
-const struct file_operations write_fifo_fops = {
-	.llseek		= no_llseek,
-	.read		= bad_pipe_r,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_write_open,
-	.release	= pipe_write_release,
-	.fasync		= pipe_write_fasync,
-};
-
-const struct file_operations rdwr_fifo_fops = {
-	.llseek		= no_llseek,
-	.read		= do_sync_read,
-	.aio_read	= pipe_read,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_rdwr_open,
-	.release	= pipe_rdwr_release,
-	.fasync		= pipe_rdwr_fasync,
-};
-
-static const struct file_operations read_pipe_fops = {
+const struct file_operations read_pipefifo_fops = {
 	.llseek		= no_llseek,
 	.read		= do_sync_read,
 	.aio_read	= pipe_read,
@@ -827,7 +792,7 @@ static const struct file_operations read_pipe_fops = {
 	.fasync		= pipe_read_fasync,
 };
 
-static const struct file_operations write_pipe_fops = {
+const struct file_operations write_pipefifo_fops = {
 	.llseek		= no_llseek,
 	.read		= bad_pipe_r,
 	.write		= do_sync_write,
@@ -839,7 +804,7 @@ static const struct file_operations write_pipe_fops = {
 	.fasync		= pipe_write_fasync,
 };
 
-static const struct file_operations rdwr_pipe_fops = {
+const struct file_operations rdwr_pipefifo_fops = {
 	.llseek		= no_llseek,
 	.read		= do_sync_read,
 	.aio_read	= pipe_read,
@@ -927,7 +892,7 @@ static struct inode * get_pipe_inode(void)
 	inode->i_pipe = pipe;
 
 	pipe->readers = pipe->writers = 1;
-	inode->i_fop = &rdwr_pipe_fops;
+	inode->i_fop = &rdwr_pipefifo_fops;
 
 	/*
 	 * Mark the inode dirty from the very beginning,
@@ -978,7 +943,7 @@ struct file *create_write_pipe(int flags)
 	d_instantiate(dentry, inode);
 
 	err = -ENFILE;
-	f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipe_fops);
+	f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
 	if (!f)
 		goto err_dentry;
 	f->f_mapping = inode->i_mapping;
@@ -1020,7 +985,7 @@ struct file *create_read_pipe(struct file *wrf, int flags)
 
 	f->f_pos = 0;
 	f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	f->f_op = &read_pipe_fops;
+	f->f_op = &read_pipefifo_fops;
 	f->f_mode = FMODE_READ;
 	f->f_version = 0;
 
-- 
cgit v1.2.3


From 7ac6cd653d7c31ad6b7bb5b88c549c4ebf628c34 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Jul 2008 23:07:54 +0200
Subject: [patch] hppfs: remove hppfs_permission

hppfs_permission() is equivalent to the '.permission == NULL' case.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hppfs/hppfs.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 65077aa90f0..2b3d1828db9 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -655,20 +655,13 @@ static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
 }
 
-int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd)
-{
-	return generic_permission(inode, mask, NULL);
-}
-
 static const struct inode_operations hppfs_dir_iops = {
 	.lookup		= hppfs_lookup,
-	.permission	= hppfs_permission,
 };
 
 static const struct inode_operations hppfs_link_iops = {
 	.readlink	= hppfs_readlink,
 	.follow_link	= hppfs_follow_link,
-	.permission	= hppfs_permission,
 };
 
 static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
-- 
cgit v1.2.3


From 9043476f726802f4b00c96d0c4f418dde48d1304 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 15 Jul 2008 08:54:06 -0400
Subject: [PATCH] sanitize proc_sysctl

* keep references to ctl_table_head and ctl_table in /proc/sys inodes
* grab the former during operations, use the latter for access to
  entry if that succeeds
* have ->d_compare() check if table should be seen for one who does lookup;
  that allows us to avoid flipping inodes - if we have the same name resolve
  to different things, we'll just keep several dentries and ->d_compare()
  will reject the wrong ones.
* have ->lookup() and ->readdir() scan the table of our inode first, then
  walk all ctl_table_header and scan ->attached_by for those that are
  attached to our directory.
* implement ->getattr().
* get rid of insane amounts of tree-walking
* get rid of the need to know dentry in ->permission() and of the contortions
  induced by that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/inode.c       |   5 +
 fs/proc/proc_sysctl.c | 427 +++++++++++++++++++++++---------------------------
 2 files changed, 197 insertions(+), 235 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b37f25dc45a..8bb03f056c2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/smp_lock.h>
+#include <linux/sysctl.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
 			module_put(de->owner);
 		de_put(de);
 	}
+	if (PROC_I(inode)->sysctl)
+		sysctl_head_put(PROC_I(inode)->sysctl);
 	clear_inode(inode);
 }
 
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	ei->fd = 0;
 	ei->op.proc_get_link = NULL;
 	ei->pde = NULL;
+	ei->sysctl = NULL;
+	ei->sysctl_entry = NULL;
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	return inode;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f..fa1ec2433e4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
 static struct dentry_operations proc_sys_dentry_operations;
 static const struct file_operations proc_sys_file_operations;
 static const struct inode_operations proc_sys_inode_operations;
+static const struct file_operations proc_sys_dir_file_operations;
+static const struct inode_operations proc_sys_dir_operations;
 
-static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
-{
-	/* Refresh the cached information bits in the inode */
-	if (table) {
-		inode->i_uid = 0;
-		inode->i_gid = 0;
-		inode->i_mode = table->mode;
-		if (table->proc_handler) {
-			inode->i_mode |= S_IFREG;
-			inode->i_nlink = 1;
-		} else {
-			inode->i_mode |= S_IFDIR;
-			inode->i_nlink = 0;	/* It is too hard to figure out */
-		}
-	}
-}
-
-static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
+static struct inode *proc_sys_make_inode(struct super_block *sb,
+		struct ctl_table_header *head, struct ctl_table *table)
 {
 	struct inode *inode;
-	struct proc_inode *dir_ei, *ei;
-	int depth;
+	struct proc_inode *ei;
 
-	inode = new_inode(dir->i_sb);
+	inode = new_inode(sb);
 	if (!inode)
 		goto out;
 
-	/* A directory is always one deeper than it's parent */
-	dir_ei = PROC_I(dir);
-	depth = dir_ei->fd + 1;
-
+	sysctl_head_get(head);
 	ei = PROC_I(inode);
-	ei->fd = depth;
+	ei->sysctl = head;
+	ei->sysctl_entry = table;
+
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	inode->i_op = &proc_sys_inode_operations;
-	inode->i_fop = &proc_sys_file_operations;
 	inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
-	proc_sys_refresh_inode(inode, table);
+	inode->i_mode = table->mode;
+	if (!table->child) {
+		inode->i_mode |= S_IFREG;
+		inode->i_op = &proc_sys_inode_operations;
+		inode->i_fop = &proc_sys_file_operations;
+	} else {
+		inode->i_mode |= S_IFDIR;
+		inode->i_nlink = 0;
+		inode->i_op = &proc_sys_dir_operations;
+		inode->i_fop = &proc_sys_dir_file_operations;
+	}
 out:
 	return inode;
 }
 
-static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
-{
-	for (;;) {
-		struct proc_inode *ei;
-
-		ei = PROC_I(dentry->d_inode);
-		if (ei->fd == depth)
-			break; /* found */
-
-		dentry = dentry->d_parent;
-	}
-	return dentry;
-}
-
-static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
-							struct qstr *name)
+static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
 {
 	int len;
-	for ( ; table->ctl_name || table->procname; table++) {
+	for ( ; p->ctl_name || p->procname; p++) {
 
-		if (!table->procname)
+		if (!p->procname)
 			continue;
 
-		len = strlen(table->procname);
+		len = strlen(p->procname);
 		if (len != name->len)
 			continue;
 
-		if (memcmp(table->procname, name->name, len) != 0)
+		if (memcmp(p->procname, name->name, len) != 0)
 			continue;
 
 		/* I have a match */
-		return table;
+		return p;
 	}
 	return NULL;
 }
 
-static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
-						struct ctl_table *table)
+struct ctl_table_header *grab_header(struct inode *inode)
 {
-	struct dentry *ancestor;
-	struct proc_inode *ei;
-	int depth, i;
+	if (PROC_I(inode)->sysctl)
+		return sysctl_head_grab(PROC_I(inode)->sysctl);
+	else
+		return sysctl_head_next(NULL);
+}
 
-	ei = PROC_I(dentry->d_inode);
-	depth = ei->fd;
+static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
+					struct nameidata *nd)
+{
+	struct ctl_table_header *head = grab_header(dir);
+	struct ctl_table *table = PROC_I(dir)->sysctl_entry;
+	struct ctl_table_header *h = NULL;
+	struct qstr *name = &dentry->d_name;
+	struct ctl_table *p;
+	struct inode *inode;
+	struct dentry *err = ERR_PTR(-ENOENT);
 
-	if (depth == 0)
-		return table;
+	if (IS_ERR(head))
+		return ERR_CAST(head);
 
-	for (i = 1; table && (i <= depth); i++) {
-		ancestor = proc_sys_ancestor(dentry, i);
-		table = proc_sys_lookup_table_one(table, &ancestor->d_name);
-		if (table)
-			table = table->child;
+	if (table && !table->child) {
+		WARN_ON(1);
+		goto out;
 	}
-	return table;
-
-}
-static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
-						struct qstr *name,
-						struct ctl_table *table)
-{
-	table = proc_sys_lookup_table(dparent, table);
-	if (table)
-		table = proc_sys_lookup_table_one(table, name);
-	return table;
-}
 
-static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
-						struct qstr *name,
-						struct ctl_table_header **ptr)
-{
-	struct ctl_table_header *head;
-	struct ctl_table *table = NULL;
+	table = table ? table->child : head->ctl_table;
 
-	for (head = sysctl_head_next(NULL); head;
-			head = sysctl_head_next(head)) {
-		table = proc_sys_lookup_entry(parent, name, head->ctl_table);
-		if (table)
-			break;
+	p = find_in_table(table, name);
+	if (!p) {
+		for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
+			if (h->attached_to != table)
+				continue;
+			p = find_in_table(h->attached_by, name);
+			if (p)
+				break;
+		}
 	}
-	*ptr = head;
-	return table;
-}
-
-static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
-					struct nameidata *nd)
-{
-	struct ctl_table_header *head;
-	struct inode *inode;
-	struct dentry *err;
-	struct ctl_table *table;
 
-	err = ERR_PTR(-ENOENT);
-	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-	if (!table)
+	if (!p)
 		goto out;
 
 	err = ERR_PTR(-ENOMEM);
-	inode = proc_sys_make_inode(dir, table);
+	inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
+	if (h)
+		sysctl_head_finish(h);
+
 	if (!inode)
 		goto out;
 
@@ -168,22 +129,14 @@ out:
 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 		size_t count, loff_t *ppos, int write)
 {
-	struct dentry *dentry = filp->f_dentry;
-	struct ctl_table_header *head;
-	struct ctl_table *table;
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct ctl_table_header *head = grab_header(inode);
+	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 	ssize_t error;
 	size_t res;
 
-	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-	/* Has the sysctl entry disappeared on us? */
-	error = -ENOENT;
-	if (!table)
-		goto out;
-
-	/* Has the sysctl entry been replaced by a directory? */
-	error = -EISDIR;
-	if (!table->proc_handler)
-		goto out;
+	if (IS_ERR(head))
+		return PTR_ERR(head);
 
 	/*
 	 * At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 	if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
 		goto out;
 
+	/* if that can happen at all, it should be -EINVAL, not -EISDIR */
+	error = -EINVAL;
+	if (!table->proc_handler)
+		goto out;
+
 	/* careful: calling conventions are nasty here */
 	res = count;
 	error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
 
 
 static int proc_sys_fill_cache(struct file *filp, void *dirent,
-				filldir_t filldir, struct ctl_table *table)
+				filldir_t filldir,
+				struct ctl_table_header *head,
+				struct ctl_table *table)
 {
-	struct ctl_table_header *head;
-	struct ctl_table *child_table = NULL;
 	struct dentry *child, *dir = filp->f_path.dentry;
 	struct inode *inode;
 	struct qstr qname;
 	ino_t ino = 0;
 	unsigned type = DT_UNKNOWN;
-	int ret;
 
 	qname.name = table->procname;
 	qname.len  = strlen(table->procname);
 	qname.hash = full_name_hash(qname.name, qname.len);
 
-	/* Suppress duplicates.
-	 * Only fill a directory entry if it is the value that
-	 * an ordinary lookup of that name returns.  Hide all
-	 * others.
-	 *
-	 * If we ever cache this translation in the dcache
-	 * I should do a dcache lookup first.  But for now
-	 * it is just simpler not to.
-	 */
-	ret = 0;
-	child_table = do_proc_sys_lookup(dir, &qname, &head);
-	sysctl_head_finish(head);
-	if (child_table != table)
-		return 0;
-
 	child = d_lookup(dir, &qname);
 	if (!child) {
-		struct dentry *new;
-		new = d_alloc(dir, &qname);
-		if (new) {
-			inode = proc_sys_make_inode(dir->d_inode, table);
-			if (!inode)
-				child = ERR_PTR(-ENOMEM);
-			else {
-				new->d_op = &proc_sys_dentry_operations;
-				d_add(new, inode);
+		child = d_alloc(dir, &qname);
+		if (child) {
+			inode = proc_sys_make_inode(dir->d_sb, head, table);
+			if (!inode) {
+				dput(child);
+				return -ENOMEM;
+			} else {
+				child->d_op = &proc_sys_dentry_operations;
+				d_add(child, inode);
 			}
-			if (child)
-				dput(new);
-			else
-				child = new;
+		} else {
+			return -ENOMEM;
 		}
 	}
-	if (!child || IS_ERR(child) || !child->d_inode)
-		goto end_instantiate;
 	inode = child->d_inode;
-	if (inode) {
-		ino  = inode->i_ino;
-		type = inode->i_mode >> 12;
-	}
+	ino  = inode->i_ino;
+	type = inode->i_mode >> 12;
 	dput(child);
-end_instantiate:
-	if (!ino)
-		ino= find_inode_number(dir, &qname);
-	if (!ino)
-		ino = 1;
-	return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+	return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+}
+
+static int scan(struct ctl_table_header *head, ctl_table *table,
+		unsigned long *pos, struct file *file,
+		void *dirent, filldir_t filldir)
+{
+
+	for (; table->ctl_name || table->procname; table++, (*pos)++) {
+		int res;
+
+		/* Can't do anything without a proc name */
+		if (!table->procname)
+			continue;
+
+		if (*pos < file->f_pos)
+			continue;
+
+		res = proc_sys_fill_cache(file, dirent, filldir, head, table);
+		if (res)
+			return res;
+
+		file->f_pos = *pos + 1;
+	}
+	return 0;
 }
 
 static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct dentry *dentry = filp->f_dentry;
+	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
-	struct ctl_table_header *head = NULL;
-	struct ctl_table *table;
+	struct ctl_table_header *head = grab_header(inode);
+	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+	struct ctl_table_header *h = NULL;
 	unsigned long pos;
-	int ret;
+	int ret = -EINVAL;
+
+	if (IS_ERR(head))
+		return PTR_ERR(head);
 
-	ret = -ENOTDIR;
-	if (!S_ISDIR(inode->i_mode))
+	if (table && !table->child) {
+		WARN_ON(1);
 		goto out;
+	}
+
+	table = table ? table->child : head->ctl_table;
 
 	ret = 0;
 	/* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	}
 	pos = 2;
 
-	/* - Find each instance of the directory
-	 * - Read all entries in each instance
-	 * - Before returning an entry to user space lookup the entry
-	 *   by name and if I find a different entry don't return
-	 *   this one because it means it is a buried dup.
-	 * For sysctl this should only happen for directory entries.
-	 */
-	for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
-		table = proc_sys_lookup_table(dentry, head->ctl_table);
+	ret = scan(head, table, &pos, filp, dirent, filldir);
+	if (ret)
+		goto out;
 
-		if (!table)
+	for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
+		if (h->attached_to != table)
 			continue;
-
-		for (; table->ctl_name || table->procname; table++, pos++) {
-			/* Can't do anything without a proc name */
-			if (!table->procname)
-				continue;
-
-			if (pos < filp->f_pos)
-				continue;
-
-			if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
-				goto out;
-			filp->f_pos = pos + 1;
+		ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
+		if (ret) {
+			sysctl_head_finish(h);
+			break;
 		}
 	}
 	ret = 1;
@@ -349,47 +298,18 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
 	 * sysctl entries that are not writeable,
 	 * are _NOT_ writeable, capabilities or not.
 	 */
-	struct ctl_table_header *head;
-	struct ctl_table *table;
-	struct dentry *dentry;
-	int mode;
-	int depth;
+	struct ctl_table_header *head = grab_header(inode);
+	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 	int error;
 
-	head = NULL;
-	depth = PROC_I(inode)->fd;
-
-	/* First check the cached permissions, in case we don't have
-	 * enough information to lookup the sysctl table entry.
-	 */
-	error = -EACCES;
-	mode = inode->i_mode;
-
-	if (current->euid == 0)
-		mode >>= 6;
-	else if (in_group_p(0))
-		mode >>= 3;
-
-	if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
-		error = 0;
-
-	/* If we can't get a sysctl table entry the permission
-	 * checks on the cached mode will have to be enough.
-	 */
-	if (!nd || !depth)
-		goto out;
+	if (IS_ERR(head))
+		return PTR_ERR(head);
 
-	dentry = nd->path.dentry;
-	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+	if (!table) /* global root - r-xr-xr-x */
+		error = mask & MAY_WRITE ? -EACCES : 0;
+	else /* Use the permissions on the sysctl table entry */
+		error = sysctl_perm(head->root, table, mask);
 
-	/* If the entry does not exist deny permission */
-	error = -EACCES;
-	if (!table)
-		goto out;
-
-	/* Use the permissions on the sysctl table entry */
-	error = sysctl_perm(head->root, table, mask);
-out:
 	sysctl_head_finish(head);
 	return error;
 }
@@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 	return error;
 }
 
-/* I'm lazy and don't distinguish between files and directories,
- * until access time.
- */
+static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	struct ctl_table_header *head = grab_header(inode);
+	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+
+	if (IS_ERR(head))
+		return PTR_ERR(head);
+
+	generic_fillattr(inode, stat);
+	if (table)
+		stat->mode = (stat->mode & S_IFMT) | table->mode;
+
+	sysctl_head_finish(head);
+	return 0;
+}
+
 static const struct file_operations proc_sys_file_operations = {
 	.read		= proc_sys_read,
 	.write		= proc_sys_write,
+};
+
+static const struct file_operations proc_sys_dir_file_operations = {
 	.readdir	= proc_sys_readdir,
 };
 
 static const struct inode_operations proc_sys_inode_operations = {
+	.permission	= proc_sys_permission,
+	.setattr	= proc_sys_setattr,
+	.getattr	= proc_sys_getattr,
+};
+
+static const struct inode_operations proc_sys_dir_operations = {
 	.lookup		= proc_sys_lookup,
 	.permission	= proc_sys_permission,
 	.setattr	= proc_sys_setattr,
+	.getattr	= proc_sys_getattr,
 };
 
 static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct ctl_table_header *head;
-	struct ctl_table *table;
-	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-	proc_sys_refresh_inode(dentry->d_inode, table);
-	sysctl_head_finish(head);
-	return !!table;
+	return !PROC_I(dentry->d_inode)->sysctl->unregistering;
+}
+
+static int proc_sys_delete(struct dentry *dentry)
+{
+	return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
+}
+
+static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
+			    struct qstr *name)
+{
+	struct dentry *dentry = container_of(qstr, struct dentry, d_name);
+	if (qstr->len != name->len)
+		return 1;
+	if (memcmp(qstr->name, name->name, name->len))
+		return 1;
+	return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
 }
 
 static struct dentry_operations proc_sys_dentry_operations = {
 	.d_revalidate	= proc_sys_revalidate,
+	.d_delete	= proc_sys_delete,
+	.d_compare	= proc_sys_compare,
 };
 
 static struct proc_dir_entry *proc_sys_root;
@@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root;
 int proc_sys_init(void)
 {
 	proc_sys_root = proc_mkdir("sys", NULL);
-	proc_sys_root->proc_iops = &proc_sys_inode_operations;
-	proc_sys_root->proc_fops = &proc_sys_file_operations;
+	proc_sys_root->proc_iops = &proc_sys_dir_operations;
+	proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
 	proc_sys_root->nlink = 0;
 	return 0;
 }
-- 
cgit v1.2.3


From 1bd5191d9f5d1928c4efdf604c4164b04bb88dbe Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 21 May 2008 19:15:03 +0200
Subject: [patch 05/14] hpfs: dont call permission()

hpfs_unlink() calls permission() prior to truncating the file.  HPFS
doesn't define a .permission method, so replace with explicit call to
generic_permission().

This is equivalent, except that devcgroup_inode_permission() and
security_inode_permission() are not called.

The truncation is just an implementation detail of the unlink, so
these security checks are unnecessary.

I suspect that even calling generic_permission() is unnecessary, since
we shouldn't mind if the file isn't writable.  But I leave that to the
maintainer to decide.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
---
 fs/hpfs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d256559b410..d9c59a77544 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -415,7 +415,7 @@ again:
 		d_drop(dentry);
 		spin_lock(&dentry->d_lock);
 		if (atomic_read(&dentry->d_count) > 1 ||
-		    permission(inode, MAY_WRITE, NULL) ||
+		    generic_permission(inode, MAY_WRITE, NULL) ||
 		    !S_ISREG(inode->i_mode) ||
 		    get_write_access(inode)) {
 			spin_unlock(&dentry->d_lock);
-- 
cgit v1.2.3


From e6305c43eda10ebfd2ad9e35d6e172ccc7bb3695 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 15 Jul 2008 21:03:57 -0400
Subject: [PATCH] sanitize ->permission() prototype

* kill nameidata * argument; map the 3 bits in ->flags anybody cares
  about to new MAY_... ones and pass with the mask.
* kill redundant gfs2_iop_permission()
* sanitize ecryptfs_permission()
* fix remaining places where ->permission() instances might barf on new
  MAY_... found in mask.

The obvious next target in that direction is permission(9)

folded fix for nfs_permission() breakage from Miklos Szeredi <mszeredi@suse.cz>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/internal.h           |  4 +---
 fs/afs/security.c           |  2 +-
 fs/bad_inode.c              |  3 +--
 fs/cifs/cifsfs.c            |  2 +-
 fs/coda/dir.c               |  4 +++-
 fs/coda/pioctl.c            |  6 ++----
 fs/ecryptfs/inode.c         | 17 ++---------------
 fs/ext2/acl.c               |  2 +-
 fs/ext2/acl.h               |  2 +-
 fs/ext3/acl.c               |  2 +-
 fs/ext3/acl.h               |  2 +-
 fs/ext4/acl.c               |  2 +-
 fs/ext4/acl.h               |  2 +-
 fs/fuse/dir.c               |  6 +++---
 fs/gfs2/ops_inode.c         | 12 +++---------
 fs/hfs/inode.c              |  3 +--
 fs/hfsplus/inode.c          |  2 +-
 fs/hostfs/hostfs_kern.c     |  2 +-
 fs/jffs2/acl.c              |  2 +-
 fs/jffs2/acl.h              |  2 +-
 fs/jfs/acl.c                |  2 +-
 fs/jfs/jfs_acl.h            |  2 +-
 fs/namei.c                  | 23 +++++++++++++++++------
 fs/nfs/dir.c                | 11 +++++------
 fs/ocfs2/file.c             |  2 +-
 fs/ocfs2/file.h             |  3 +--
 fs/proc/base.c              |  3 +--
 fs/proc/proc_sysctl.c       |  2 +-
 fs/reiserfs/xattr.c         |  2 +-
 fs/smbfs/file.c             |  4 ++--
 fs/xfs/linux-2.6/xfs_iops.c |  3 +--
 31 files changed, 60 insertions(+), 76 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7102824ba84..3cb6920ff30 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -469,8 +469,6 @@ extern bool afs_cm_incoming_call(struct afs_call *);
 extern const struct inode_operations afs_dir_inode_operations;
 extern const struct file_operations afs_dir_file_operations;
 
-extern int afs_permission(struct inode *, int, struct nameidata *);
-
 /*
  * file.c
  */
@@ -605,7 +603,7 @@ extern void afs_clear_permits(struct afs_vnode *);
 extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
 extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int, struct nameidata *);
+extern int afs_permission(struct inode *, int);
 
 /*
  * server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 3bcbeceba1b..3ef50437003 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -284,7 +284,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
  * - AFS ACLs are attached to directories only, and a file is controlled by its
  *   parent directory's ACL
  */
-int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int afs_permission(struct inode *inode, int mask)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	afs_access_t uninitialized_var(access);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f1c2ea8342f..5f1538c03b1 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -243,8 +243,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
 	return -EIO;
 }
 
-static int bad_inode_permission(struct inode *inode, int mask,
-			struct nameidata *nd)
+static int bad_inode_permission(struct inode *inode, int mask)
 {
 	return -EIO;
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index fe5f6809cba..1ec7076f7b2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -267,7 +267,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int cifs_permission(struct inode *inode, int mask)
 {
 	struct cifs_sb_info *cifs_sb;
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 3d2580e00a3..c5916228243 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -137,9 +137,11 @@ exit:
 }
 
 
-int coda_permission(struct inode *inode, int mask, struct nameidata *nd)
+int coda_permission(struct inode *inode, int mask)
 {
         int error = 0;
+
+	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
  
 	if (!mask)
 		return 0; 
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c21a1f552a6..c38a98974fb 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,8 +24,7 @@
 #include <linux/coda_psdev.h>
 
 /* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask,
-				 struct nameidata *nd);
+static int coda_ioctl_permission(struct inode *inode, int mask);
 static int coda_pioctl(struct inode * inode, struct file * filp, 
                        unsigned int cmd, unsigned long user_data);
 
@@ -42,8 +41,7 @@ const struct file_operations coda_ioctl_operations = {
 };
 
 /* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask,
-				 struct nameidata *nd)
+static int coda_ioctl_permission(struct inode *inode, int mask)
 {
         return 0;
 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index d755455e3bf..32f4228efcd 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -830,22 +830,9 @@ out:
 }
 
 static int
-ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+ecryptfs_permission(struct inode *inode, int mask)
 {
-	int rc;
-
-        if (nd) {
-		struct vfsmount *vfsmnt_save = nd->path.mnt;
-		struct dentry *dentry_save = nd->path.dentry;
-
-		nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry);
-		nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry);
-		rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
-		nd->path.mnt = vfsmnt_save;
-		nd->path.dentry = dentry_save;
-        } else
-		rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
-        return rc;
+	return permission(ecryptfs_inode_to_lower(inode), mask, NULL);
 }
 
 /**
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index e58669e1b87..ae8c4f850b2 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -294,7 +294,7 @@ ext2_check_acl(struct inode *inode, int mask)
 }
 
 int
-ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext2_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, ext2_check_acl);
 }
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 0bde85bafe3..b42cf578554 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -58,7 +58,7 @@ static inline int ext2_acl_count(size_t size)
 #define EXT2_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext2_permission (struct inode *, int, struct nameidata *);
+extern int ext2_permission (struct inode *, int);
 extern int ext2_acl_chmod (struct inode *);
 extern int ext2_init_acl (struct inode *, struct inode *);
 
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index a754d184817..b60bb241880 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -299,7 +299,7 @@ ext3_check_acl(struct inode *inode, int mask)
 }
 
 int
-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext3_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, ext3_check_acl);
 }
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 0d1e6279cbf..42da16b8cac 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -58,7 +58,7 @@ static inline int ext3_acl_count(size_t size)
 #define EXT3_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_permission (struct inode *, int);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
 
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3c8dab880d9..c7d04e16544 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -299,7 +299,7 @@ ext4_check_acl(struct inode *inode, int mask)
 }
 
 int
-ext4_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext4_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, ext4_check_acl);
 }
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 26a5c1abf14..cd2b855a07d 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -58,7 +58,7 @@ static inline int ext4_acl_count(size_t size)
 #define EXT4_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext4_permission (struct inode *, int, struct nameidata *);
+extern int ext4_permission (struct inode *, int);
 extern int ext4_acl_chmod (struct inode *);
 extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 51d0035ff07..48a7934cb95 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -898,7 +898,7 @@ static int fuse_access(struct inode *inode, int mask)
 		return PTR_ERR(req);
 
 	memset(&inarg, 0, sizeof(inarg));
-	inarg.mask = mask;
+	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
 	req->in.h.opcode = FUSE_ACCESS;
 	req->in.h.nodeid = get_node_id(inode);
 	req->in.numargs = 1;
@@ -927,7 +927,7 @@ static int fuse_access(struct inode *inode, int mask)
  * access request is sent.  Execute permission is still checked
  * locally based on file mode.
  */
-static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int fuse_permission(struct inode *inode, int mask)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	bool refreshed = false;
@@ -962,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
 		   exist.  So if permissions are revoked this won't be
 		   noticed immediately, only after the attribute
 		   timeout has expired */
-	} else if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) {
+	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
 		err = fuse_access(inode, mask);
 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1e252dfc529..4e982532f08 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -915,12 +915,6 @@ int gfs2_permission(struct inode *inode, int mask)
 	return error;
 }
 
-static int gfs2_iop_permission(struct inode *inode, int mask,
-			       struct nameidata *nd)
-{
-	return gfs2_permission(inode, mask);
-}
-
 static int setattr_size(struct inode *inode, struct iattr *attr)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -1150,7 +1144,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 }
 
 const struct inode_operations gfs2_file_iops = {
-	.permission = gfs2_iop_permission,
+	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
 	.setxattr = gfs2_setxattr,
@@ -1169,7 +1163,7 @@ const struct inode_operations gfs2_dir_iops = {
 	.rmdir = gfs2_rmdir,
 	.mknod = gfs2_mknod,
 	.rename = gfs2_rename,
-	.permission = gfs2_iop_permission,
+	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
 	.setxattr = gfs2_setxattr,
@@ -1181,7 +1175,7 @@ const struct inode_operations gfs2_dir_iops = {
 const struct inode_operations gfs2_symlink_iops = {
 	.readlink = gfs2_readlink,
 	.follow_link = gfs2_follow_link,
-	.permission = gfs2_iop_permission,
+	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
 	.setxattr = gfs2_setxattr,
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index dc4ec640e87..aa73f3fd5dd 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -511,8 +511,7 @@ void hfs_clear_inode(struct inode *inode)
 	}
 }
 
-static int hfs_permission(struct inode *inode, int mask,
-			  struct nameidata *nd)
+static int hfs_permission(struct inode *inode, int mask)
 {
 	if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
 		return 0;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index cc3b5e24339..d4014e3044d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -238,7 +238,7 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
 	perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
 }
 
-static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int hfsplus_permission(struct inode *inode, int mask)
 {
 	/* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
 	 * open_exec has the same test, so it's still not executable, if a x bit
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5222345ddcc..d6ecabf4d23 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -822,7 +822,7 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	return err;
 }
 
-int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
+int hostfs_permission(struct inode *ino, int desired)
 {
 	char *name;
 	int r = 0, w = 0, x = 0, err;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 4c80404a9ab..d98713777a1 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -314,7 +314,7 @@ static int jffs2_check_acl(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
-int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jffs2_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, jffs2_check_acl);
 }
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 0bb7f003fd8..8ca058aed38 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,7 @@ struct jffs2_acl_header {
 
 #define JFFS2_ACL_NOT_CACHED ((void *)-1)
 
-extern int jffs2_permission(struct inode *, int, struct nameidata *);
+extern int jffs2_permission(struct inode *, int);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
 extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 4d84bdc8829..d3e5c33665d 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -140,7 +140,7 @@ static int jfs_check_acl(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
-int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int jfs_permission(struct inode *inode, int mask)
 {
 	return generic_permission(inode, mask, jfs_check_acl);
 }
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 455fa429204..88475f10a38 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-int jfs_permission(struct inode *, int, struct nameidata *);
+int jfs_permission(struct inode *, int);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
 
diff --git a/fs/namei.c b/fs/namei.c
index 3b26a240ade..46af98ed136 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -185,6 +185,8 @@ int generic_permission(struct inode *inode, int mask,
 {
 	umode_t			mode = inode->i_mode;
 
+	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+
 	if (current->fsuid == inode->i_uid)
 		mode >>= 6;
 	else {
@@ -203,7 +205,7 @@ int generic_permission(struct inode *inode, int mask,
 	/*
 	 * If the DACs are ok we don't need any capability check.
 	 */
-	if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
+	if ((mask & ~mode) == 0)
 		return 0;
 
  check_capabilities:
@@ -228,7 +230,7 @@ int generic_permission(struct inode *inode, int mask,
 
 int permission(struct inode *inode, int mask, struct nameidata *nd)
 {
-	int retval, submask;
+	int retval;
 	struct vfsmount *mnt = NULL;
 
 	if (nd)
@@ -261,9 +263,17 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	}
 
 	/* Ordinary permission routines do not understand MAY_APPEND. */
-	submask = mask & ~MAY_APPEND;
 	if (inode->i_op && inode->i_op->permission) {
-		retval = inode->i_op->permission(inode, submask, nd);
+		int extra = 0;
+		if (nd) {
+			if (nd->flags & LOOKUP_ACCESS)
+				extra |= MAY_ACCESS;
+			if (nd->flags & LOOKUP_CHDIR)
+				extra |= MAY_CHDIR;
+			if (nd->flags & LOOKUP_OPEN)
+				extra |= MAY_OPEN;
+		}
+		retval = inode->i_op->permission(inode, mask | extra);
 		if (!retval) {
 			/*
 			 * Exec permission on a regular file is denied if none
@@ -277,7 +287,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 				return -EACCES;
 		}
 	} else {
-		retval = generic_permission(inode, submask, NULL);
+		retval = generic_permission(inode, mask, NULL);
 	}
 	if (retval)
 		return retval;
@@ -286,7 +296,8 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	if (retval)
 		return retval;
 
-	return security_inode_permission(inode, mask, nd);
+	return security_inode_permission(inode,
+			mask & (MAY_READ|MAY_WRITE|MAY_EXEC), nd);
 }
 
 /**
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 28a238dab23..74f92b717f7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1884,7 +1884,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 		return status;
 	nfs_access_add_cache(inode, &cache);
 out:
-	if ((cache.mask & mask) == mask)
+	if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
 		return 0;
 	return -EACCES;
 }
@@ -1907,17 +1907,17 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
 	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
 }
 
-int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int nfs_permission(struct inode *inode, int mask)
 {
 	struct rpc_cred *cred;
 	int res = 0;
 
 	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
 
-	if (mask == 0)
+	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
 		goto out;
 	/* Is this sys_access() ? */
-	if (nd != NULL && (nd->flags & LOOKUP_ACCESS))
+	if (mask & MAY_ACCESS)
 		goto force_lookup;
 
 	switch (inode->i_mode & S_IFMT) {
@@ -1926,8 +1926,7 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 		case S_IFREG:
 			/* NFSv4 has atomic_open... */
 			if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
-					&& nd != NULL
-					&& (nd->flags & LOOKUP_OPEN))
+					&& (mask & MAY_OPEN))
 				goto out;
 			break;
 		case S_IFDIR:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e8514e8b6ce..be2dd95d3a1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ bail:
 	return err;
 }
 
-int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
+int ocfs2_permission(struct inode *inode, int mask)
 {
 	int ret;
 
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 048ddcaf5c8..1e27b4d017e 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -62,8 +62,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		  struct kstat *stat);
-int ocfs2_permission(struct inode *inode, int mask,
-		     struct nameidata *nd);
+int ocfs2_permission(struct inode *inode, int mask);
 
 int ocfs2_should_update_atime(struct inode *inode,
 			      struct vfsmount *vfsmnt);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 81bce6791bf..d82d800389f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1859,8 +1859,7 @@ static const struct file_operations proc_fd_operations = {
  * /proc/pid/fd needs a special permission handler so that a process can still
  * access /proc/self/fd after it has executed a setuid().
  */
-static int proc_fd_permission(struct inode *inode, int mask,
-				struct nameidata *nd)
+static int proc_fd_permission(struct inode *inode, int mask)
 {
 	int rv;
 
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index fa1ec2433e4..f9a8b892718 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -292,7 +292,7 @@ out:
 	return ret;
 }
 
-static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
+static int proc_sys_permission(struct inode *inode, int mask)
 {
 	/*
 	 * sysctl entries that are not writeable,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d7c4935c103..bb3cb5b7cdb 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1250,7 +1250,7 @@ static int reiserfs_check_acl(struct inode *inode, int mask)
 	return error;
 }
 
-int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+int reiserfs_permission(struct inode *inode, int mask)
 {
 	/*
 	 * We don't do permission checks on the internal objects.
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 2294783320c..e4f8d51a555 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -408,7 +408,7 @@ smb_file_release(struct inode *inode, struct file * file)
  * privileges, so we need our own check for this.
  */
 static int
-smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
+smb_file_permission(struct inode *inode, int mask)
 {
 	int mode = inode->i_mode;
 	int error = 0;
@@ -417,7 +417,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
 
 	/* Look at user permissions */
 	mode >>= 6;
-	if ((mode & 7 & mask) != mask)
+	if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
 		error = -EACCES;
 	return error;
 }
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2bf287ef548..5fc61c824bb 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -589,8 +589,7 @@ xfs_check_acl(
 STATIC int
 xfs_vn_permission(
 	struct inode		*inode,
-	int			mask,
-	struct nameidata	*nd)
+	int			mask)
 {
 	return generic_permission(inode, mask, xfs_check_acl);
 }
-- 
cgit v1.2.3


From c82e42da8a6b2f3a85dc4d4278cb8238702f8f64 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 24 Jun 2008 16:50:12 +0200
Subject: [patch 1/5] vfs: truncate: dont check immutable twice

vfs_permission(MAY_WRITE) already checked for the inode being
immutable, so no need to repeat it.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Christoph Hellwig <hch@infradead.org>
---
 fs/open.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index bb98d2fe809..b2e4c93aed0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -256,7 +256,7 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 		goto mnt_drop_write_and_out;
 
 	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+	if (IS_APPEND(inode))
 		goto mnt_drop_write_and_out;
 
 	error = get_write_access(inode);
-- 
cgit v1.2.3


From 2f1936b87783a3a56c9441b27b9ba7a747f11e8e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 24 Jun 2008 16:50:14 +0200
Subject: [patch 3/5] vfs: change remove_suid() to file_remove_suid()

All calls to remove_suid() are made with a file pointer, because
(similarly to file_update_time) it is called when the file is written.

Clean up callers by passing in a file instead of a dentry.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c             | 2 +-
 fs/ntfs/file.c             | 2 +-
 fs/splice.c                | 4 ++--
 fs/xfs/linux-2.6/xfs_lrw.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 67ff2c6a8f6..2bada6bbc31 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -893,7 +893,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (count == 0)
 		goto out;
 
-	err = remove_suid(file->f_path.dentry);
+	err = file_remove_suid(file);
 	if (err)
 		goto out;
 
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3c5550cd11d..d020866d423 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2118,7 +2118,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
 		goto out;
 	if (!count)
 		goto out;
-	err = remove_suid(file->f_path.dentry);
+	err = file_remove_suid(file);
 	if (err)
 		goto out;
 	file_update_time(file);
diff --git a/fs/splice.c b/fs/splice.c
index 47dc1a445d1..b30311ba8af 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -772,7 +772,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t ret;
 	int err;
 
-	err = remove_suid(out->f_path.dentry);
+	err = file_remove_suid(out);
 	if (unlikely(err))
 		return err;
 
@@ -830,7 +830,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 	ssize_t ret;
 
 	inode_double_lock(inode, pipe->inode);
-	ret = remove_suid(out->f_path.dentry);
+	ret = file_remove_suid(out);
 	if (likely(!ret))
 		ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
 	inode_double_unlock(inode, pipe->inode);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 5e3b57516ec..82333b3e118 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -711,7 +711,7 @@ start:
 	     !capable(CAP_FSETID)) {
 		error = xfs_write_clear_setuid(xip);
 		if (likely(!error))
-			error = -remove_suid(file->f_path.dentry);
+			error = -file_remove_suid(file);
 		if (unlikely(error)) {
 			goto out_unlock_internal;
 		}
-- 
cgit v1.2.3


From 7e79eedb3b22200cc8b774baea3a7bf28d766101 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Tue, 24 Jun 2008 16:50:15 +0200
Subject: [patch 4/5] vfs: reuse local variable in vfs_link()

Why not reuse "inode" which is assigned as

  struct inode *inode = old_dentry->d_inode;

in the beginning of vfs_link() ?

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 46af98ed136..3b67be7631d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2524,19 +2524,19 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 		return -EPERM;
 	if (!dir->i_op || !dir->i_op->link)
 		return -EPERM;
-	if (S_ISDIR(old_dentry->d_inode->i_mode))
+	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
 	error = security_inode_link(old_dentry, dir, new_dentry);
 	if (error)
 		return error;
 
-	mutex_lock(&old_dentry->d_inode->i_mutex);
+	mutex_lock(&inode->i_mutex);
 	DQUOT_INIT(dir);
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
-	mutex_unlock(&old_dentry->d_inode->i_mutex);
+	mutex_unlock(&inode->i_mutex);
 	if (!error)
-		fsnotify_link(dir, old_dentry->d_inode, new_dentry);
+		fsnotify_link(dir, inode, new_dentry);
 	return error;
 }
 
-- 
cgit v1.2.3


From db2e747b14991a4c6a5c98b0e5f552a193237c03 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 24 Jun 2008 16:50:16 +0200
Subject: [patch 5/5] vfs: remove mode parameter from vfs_symlink()

Remove the unused mode parameter from vfs_symlink and callers.

Thanks to Tetsuo Handa for noticing.

CC: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/ecryptfs/inode.c |  4 +---
 fs/namei.c          |  4 ++--
 fs/nfsd/vfs.c       | 10 ++--------
 3 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 32f4228efcd..f25caf2b088 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -465,7 +465,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
 	int rc;
 	struct dentry *lower_dentry;
 	struct dentry *lower_dir_dentry;
-	umode_t mode;
 	char *encoded_symname;
 	int encoded_symlen;
 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -473,7 +472,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	dget(lower_dentry);
 	lower_dir_dentry = lock_parent(lower_dentry);
-	mode = S_IALLUGO;
 	encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
 						  strlen(symname),
 						  &encoded_symname);
@@ -482,7 +480,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
 		goto out_lock;
 	}
 	rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
-			 encoded_symname, mode);
+			 encoded_symname);
 	kfree(encoded_symname);
 	if (rc || !lower_dentry->d_inode)
 		goto out_lock;
diff --git a/fs/namei.c b/fs/namei.c
index 3b67be7631d..ae0e56fdb74 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2434,7 +2434,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
 	return do_unlinkat(AT_FDCWD, pathname);
 }
 
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 {
 	int error = may_create(dir, dentry, NULL);
 
@@ -2483,7 +2483,7 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
-	error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
+	error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0f4481e0502..ad1ad59e374 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1516,7 +1516,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	struct dentry	*dentry, *dnew;
 	__be32		err, cerr;
 	int		host_err;
-	umode_t		mode;
 
 	err = nfserr_noent;
 	if (!flen || !plen)
@@ -1535,11 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (IS_ERR(dnew))
 		goto out_nfserr;
 
-	mode = S_IALLUGO;
-	/* Only the MODE ATTRibute is even vaguely meaningful */
-	if (iap && (iap->ia_valid & ATTR_MODE))
-		mode = iap->ia_mode & S_IALLUGO;
-
 	host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
 	if (host_err)
 		goto out_nfserr;
@@ -1551,11 +1545,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		else {
 			strncpy(path_alloced, path, plen);
 			path_alloced[plen] = 0;
-			host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+			host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
 			kfree(path_alloced);
 		}
 	} else
-		host_err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+		host_err = vfs_symlink(dentry->d_inode, dnew, path);
 
 	if (!host_err) {
 		if (EX_ISSYNC(fhp->fh_export))
-- 
cgit v1.2.3


From 8bb79224b87aab92071e94d46e70bd160d89bf34 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 Jul 2008 09:51:03 -0400
Subject: [PATCH] permission checks for chdir need special treatment only on
 the last step

... so we ought to pass MAY_CHDIR to vfs_permission() instead of having
it triggered on every step of preceding pathname resolution.  LOOKUP_CHDIR
is killed by that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 2 --
 fs/open.c  | 5 ++---
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index ae0e56fdb74..6c76e1ee9c4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -268,8 +268,6 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 		if (nd) {
 			if (nd->flags & LOOKUP_ACCESS)
 				extra |= MAY_ACCESS;
-			if (nd->flags & LOOKUP_CHDIR)
-				extra |= MAY_CHDIR;
 			if (nd->flags & LOOKUP_OPEN)
 				extra |= MAY_OPEN;
 		}
diff --git a/fs/open.c b/fs/open.c
index b2e4c93aed0..8e02d42bfe4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -501,12 +501,11 @@ asmlinkage long sys_chdir(const char __user * filename)
 	struct nameidata nd;
 	int error;
 
-	error = __user_walk(filename,
-			    LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
+	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC);
+	error = vfs_permission(&nd, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
-- 
cgit v1.2.3


From 7f2da1e7d0330395e5e9e350b879b98a1ea495df Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 10 May 2008 20:44:54 -0400
Subject: [PATCH] kill altroot

long overdue...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c     | 89 ++--------------------------------------------------------
 fs/namespace.c |  8 +-----
 fs/open.c      |  3 +-
 3 files changed, 4 insertions(+), 96 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 6c76e1ee9c4..095818089ac 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -31,7 +31,6 @@
 #include <linux/file.h>
 #include <linux/fcntl.h>
 #include <linux/device_cgroup.h>
-#include <asm/namei.h>
 #include <asm/uaccess.h>
 
 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
@@ -562,27 +561,16 @@ out_unlock:
 	return result;
 }
 
-static int __emul_lookup_dentry(const char *, struct nameidata *);
-
 /* SMP-safe */
-static __always_inline int
+static __always_inline void
 walk_init_root(const char *name, struct nameidata *nd)
 {
 	struct fs_struct *fs = current->fs;
 
 	read_lock(&fs->lock);
-	if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
-		nd->path = fs->altroot;
-		path_get(&fs->altroot);
-		read_unlock(&fs->lock);
-		if (__emul_lookup_dentry(name,nd))
-			return 0;
-		read_lock(&fs->lock);
-	}
 	nd->path = fs->root;
 	path_get(&fs->root);
 	read_unlock(&fs->lock);
-	return 1;
 }
 
 /*
@@ -623,12 +611,9 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
 
 	if (*link == '/') {
 		path_put(&nd->path);
-		if (!walk_init_root(link, nd))
-			/* weird __emul_prefix() stuff did it */
-			goto out;
+		walk_init_root(link, nd);
 	}
 	res = link_path_walk(link, nd);
-out:
 	if (nd->depth || res || nd->last_type!=LAST_NORM)
 		return res;
 	/*
@@ -1077,67 +1062,6 @@ static int path_walk(const char *name, struct nameidata *nd)
 	return link_path_walk(name, nd);
 }
 
-/* 
- * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if
- * everything is done. Returns 0 and drops input nd, if lookup failed;
- */
-static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
-{
-	if (path_walk(name, nd))
-		return 0;		/* something went wrong... */
-
-	if (!nd->path.dentry->d_inode ||
-	    S_ISDIR(nd->path.dentry->d_inode->i_mode)) {
-		struct path old_path = nd->path;
-		struct qstr last = nd->last;
-		int last_type = nd->last_type;
-		struct fs_struct *fs = current->fs;
-
-		/*
-		 * NAME was not found in alternate root or it's a directory.
-		 * Try to find it in the normal root:
-		 */
-		nd->last_type = LAST_ROOT;
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
-		if (path_walk(name, nd) == 0) {
-			if (nd->path.dentry->d_inode) {
-				path_put(&old_path);
-				return 1;
-			}
-			path_put(&nd->path);
-		}
-		nd->path = old_path;
-		nd->last = last;
-		nd->last_type = last_type;
-	}
-	return 1;
-}
-
-void set_fs_altroot(void)
-{
-	char *emul = __emul_prefix();
-	struct nameidata nd;
-	struct path path = {}, old_path;
-	int err;
-	struct fs_struct *fs = current->fs;
-
-	if (!emul)
-		goto set_it;
-	err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
-	if (!err)
-		path = nd.path;
-set_it:
-	write_lock(&fs->lock);
-	old_path = fs->altroot;
-	fs->altroot = path;
-	write_unlock(&fs->lock);
-	if (old_path.dentry)
-		path_put(&old_path);
-}
-
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
 static int do_path_lookup(int dfd, const char *name,
 				unsigned int flags, struct nameidata *nd)
@@ -1153,14 +1077,6 @@ static int do_path_lookup(int dfd, const char *name,
 
 	if (*name=='/') {
 		read_lock(&fs->lock);
-		if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
-			nd->path = fs->altroot;
-			path_get(&fs->altroot);
-			read_unlock(&fs->lock);
-			if (__emul_lookup_dentry(name,nd))
-				goto out; /* found in altroot */
-			read_lock(&fs->lock);
-		}
 		nd->path = fs->root;
 		path_get(&fs->root);
 		read_unlock(&fs->lock);
@@ -1194,7 +1110,6 @@ static int do_path_lookup(int dfd, const char *name,
 	}
 
 	retval = path_walk(name, nd);
-out:
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index f30b11e2240..c4fcf48acef 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1972,7 +1972,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		struct fs_struct *fs)
 {
 	struct mnt_namespace *new_ns;
-	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
+	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
 	struct vfsmount *p, *q;
 
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
@@ -2015,10 +2015,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 				pwdmnt = p;
 				fs->pwd.mnt = mntget(q);
 			}
-			if (p == fs->altroot.mnt) {
-				altrootmnt = p;
-				fs->altroot.mnt = mntget(q);
-			}
 		}
 		p = next_mnt(p, mnt_ns->root);
 		q = next_mnt(q, new_ns->root);
@@ -2029,8 +2025,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		mntput(rootmnt);
 	if (pwdmnt)
 		mntput(pwdmnt);
-	if (altrootmnt)
-		mntput(altrootmnt);
 
 	return new_ns;
 }
diff --git a/fs/open.c b/fs/open.c
index 8e02d42bfe4..d3a2a00f52d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -548,7 +548,7 @@ asmlinkage long sys_chroot(const char __user * filename)
 	struct nameidata nd;
 	int error;
 
-	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
 	if (error)
 		goto out;
 
@@ -561,7 +561,6 @@ asmlinkage long sys_chroot(const char __user * filename)
 		goto dput_and_out;
 
 	set_fs_root(current->fs, &nd.path);
-	set_fs_altroot();
 	error = 0;
 dput_and_out:
 	path_put(&nd.path);
-- 
cgit v1.2.3


From a110343f0d6d41f68b7cf8c00b57a3172c67f816 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 Jul 2008 09:19:08 -0400
Subject: [PATCH] fix MAY_CHDIR/MAY_ACCESS/LOOKUP_ACCESS mess

* MAY_CHDIR is redundant - it's an equivalent of MAY_ACCESS
* MAY_ACCESS on fuse should affect only the last step of pathname resolution
* fchdir() and chroot() should pass MAY_ACCESS, for the same reason why
  chdir() needs that.
* now that we pass MAY_ACCESS explicitly in all cases, LOOKUP_ACCESS can be
  removed; it has no business being in nameidata.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fuse/dir.c |  2 +-
 fs/namei.c    |  2 --
 fs/open.c     | 10 +++++-----
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 48a7934cb95..fd03330cade 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -962,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask)
 		   exist.  So if permissions are revoked this won't be
 		   noticed immediately, only after the attribute
 		   timeout has expired */
-	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
+	} else if (mask & MAY_ACCESS) {
 		err = fuse_access(inode, mask);
 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/namei.c b/fs/namei.c
index 095818089ac..33dcaf025c4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -265,8 +265,6 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	if (inode->i_op && inode->i_op->permission) {
 		int extra = 0;
 		if (nd) {
-			if (nd->flags & LOOKUP_ACCESS)
-				extra |= MAY_ACCESS;
 			if (nd->flags & LOOKUP_OPEN)
 				extra |= MAY_OPEN;
 		}
diff --git a/fs/open.c b/fs/open.c
index d3a2a00f52d..3317e1909b2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -457,11 +457,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 			old_cap = cap_set_effective(current->cap_permitted);
 	}
 
-	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
 	if (res)
 		goto out;
 
-	res = vfs_permission(&nd, mode);
+	res = vfs_permission(&nd, mode | MAY_ACCESS);
 	/* SuS v2 requires we report a read only fs too */
 	if(res || !(mode & S_IWOTH) ||
 	   special_file(nd.path.dentry->d_inode->i_mode))
@@ -505,7 +505,7 @@ asmlinkage long sys_chdir(const char __user * filename)
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC | MAY_CHDIR);
+	error = vfs_permission(&nd, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
@@ -534,7 +534,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
 	if (!S_ISDIR(inode->i_mode))
 		goto out_putf;
 
-	error = file_permission(file, MAY_EXEC);
+	error = file_permission(file, MAY_EXEC | MAY_ACCESS);
 	if (!error)
 		set_fs_pwd(current->fs, &file->f_path);
 out_putf:
@@ -552,7 +552,7 @@ asmlinkage long sys_chroot(const char __user * filename)
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC);
+	error = vfs_permission(&nd, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
-- 
cgit v1.2.3


From b77b0646ef4efe31a7449bb3d9360fd00f95433d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 Jul 2008 09:37:02 -0400
Subject: [PATCH] pass MAY_OPEN to vfs_permission() explicitly

... and get rid of the last "let's deduce mask from nameidata->flags"
bit.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c  |  4 ++--
 fs/namei.c | 13 ++++---------
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index b8792a13153..0ba5d355c5a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -118,7 +118,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
 		goto exit;
 
-	error = vfs_permission(&nd, MAY_READ | MAY_EXEC);
+	error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
 
@@ -666,7 +666,7 @@ struct file *open_exec(const char *name)
 		struct inode *inode = nd.path.dentry->d_inode;
 		file = ERR_PTR(-EACCES);
 		if (S_ISREG(inode->i_mode)) {
-			int err = vfs_permission(&nd, MAY_EXEC);
+			int err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
 			file = ERR_PTR(err);
 			if (!err) {
 				file = nameidata_to_filp(&nd,
diff --git a/fs/namei.c b/fs/namei.c
index 33dcaf025c4..6b0e8e5e079 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -263,12 +263,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 
 	/* Ordinary permission routines do not understand MAY_APPEND. */
 	if (inode->i_op && inode->i_op->permission) {
-		int extra = 0;
-		if (nd) {
-			if (nd->flags & LOOKUP_OPEN)
-				extra |= MAY_OPEN;
-		}
-		retval = inode->i_op->permission(inode, mask | extra);
+		retval = inode->i_op->permission(inode, mask);
 		if (!retval) {
 			/*
 			 * Exec permission on a regular file is denied if none
@@ -292,7 +287,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 		return retval;
 
 	return security_inode_permission(inode,
-			mask & (MAY_READ|MAY_WRITE|MAY_EXEC), nd);
+			mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
 }
 
 /**
@@ -492,7 +487,7 @@ static int exec_permission_lite(struct inode *inode,
 
 	return -EACCES;
 ok:
-	return security_inode_permission(inode, MAY_EXEC, nd);
+	return security_inode_permission(inode, MAY_EXEC);
 }
 
 /*
@@ -1692,7 +1687,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	int will_write;
 	int flag = open_to_namei_flags(open_flag);
 
-	acc_mode = ACC_MODE(flag);
+	acc_mode = MAY_OPEN | ACC_MODE(flag);
 
 	/* O_TRUNC implies we need access checks for write permissions */
 	if (flag & O_TRUNC)
-- 
cgit v1.2.3


From 672b16b2f66c149888bd876a4f92342112205fe1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 Jul 2008 09:45:01 -0400
Subject: [PATCH] more nameidata removal: exec_permission_lite() doesn't need
 it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 6b0e8e5e079..6d75430358a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -460,8 +460,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
  * short-cut DAC fails, then call permission() to do more
  * complete permission check.
  */
-static int exec_permission_lite(struct inode *inode,
-				       struct nameidata *nd)
+static int exec_permission_lite(struct inode *inode)
 {
 	umode_t	mode = inode->i_mode;
 
@@ -884,7 +883,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 		unsigned int c;
 
 		nd->flags |= LOOKUP_CONTINUE;
-		err = exec_permission_lite(inode, nd);
+		err = exec_permission_lite(inode);
 		if (err == -EAGAIN)
 			err = vfs_permission(nd, MAY_EXEC);
  		if (err)
-- 
cgit v1.2.3


From 88b387824fdaecb6ba0f471acf0aadf7d24739fd Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 21 Jul 2008 18:06:36 +0800
Subject: [PATCH] vfs: use kstrdup() and check failing allocation

- use kstrdup() instead of kmalloc() + memcpy()
- return NULL if allocating ->mnt_devname failed
- mnt_devname should be const

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index c4fcf48acef..26380f59953 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -112,9 +112,13 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		int err;
 
 		err = mnt_alloc_id(mnt);
-		if (err) {
-			kmem_cache_free(mnt_cache, mnt);
-			return NULL;
+		if (err)
+			goto out_free_cache;
+
+		if (name) {
+			mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
+			if (!mnt->mnt_devname)
+				goto out_free_id;
 		}
 
 		atomic_set(&mnt->mnt_count, 1);
@@ -127,16 +131,14 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
 		atomic_set(&mnt->__mnt_writers, 0);
-		if (name) {
-			int size = strlen(name) + 1;
-			char *newname = kmalloc(size, GFP_KERNEL);
-			if (newname) {
-				memcpy(newname, name, size);
-				mnt->mnt_devname = newname;
-			}
-		}
 	}
 	return mnt;
+
+out_free_id:
+	mnt_free_id(mnt);
+out_free_cache:
+	kmem_cache_free(mnt_cache, mnt);
+	return NULL;
 }
 
 /*
-- 
cgit v1.2.3


From 9767d74957450da6365c363d69e3d02d605d7375 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Jul 2008 15:01:26 +0200
Subject: [patch 1/4] vfs: utimes: move owner check into inode_change_ok()

Add a new ia_valid flag: ATTR_TIMES_SET, to handle the
UTIMES_OMIT/UTIMES_NOW and UTIMES_NOW/UTIMES_OMIT cases.  In these
cases neither ATTR_MTIME_SET nor ATTR_ATIME_SET is in the flags, yet
the POSIX draft specifies that permission checking is performed the
same way as if one or both of the times was explicitly set to a
timestamp.

See the path "vfs: utimensat(): fix error checking for
{UTIME_NOW,UTIME_OMIT} case" by Michael Kerrisk for the patch
introducing this behavior.

This is a cleanup, as well as allowing filesystems (NFS/fuse/...) to
perform their own permission checking instead of the default.

CC: Ulrich Drepper <drepper@redhat.com>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c   |  2 +-
 fs/utimes.c | 17 ++++-------------
 2 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/attr.c b/fs/attr.c
index 966b73e25f8..765fc75fab3 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -51,7 +51,7 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
 	}
 
 	/* Check for setting the inode time. */
-	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
+	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
 		if (!is_owner_or_cap(inode))
 			goto error;
 	}
diff --git a/fs/utimes.c b/fs/utimes.c
index b6b664e7145..ecf8941ba34 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -101,7 +101,6 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
 		     times[1].tv_nsec == UTIME_NOW)
 		times = NULL;
 
-	/* In most cases, the checks are done in inode_change_ok() */
 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 	if (times) {
 		error = -EPERM;
@@ -123,21 +122,13 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
 			newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
 			newattrs.ia_valid |= ATTR_MTIME_SET;
 		}
-
 		/*
-		 * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT
-		 * cases, we need to make an extra check that is not done by
-		 * inode_change_ok().
+		 * Tell inode_change_ok(), that this is an explicit time
+		 * update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET
+		 * were used.
 		 */
-		if (((times[0].tv_nsec == UTIME_NOW &&
-			    times[1].tv_nsec == UTIME_OMIT)
-		     ||
-		     (times[0].tv_nsec == UTIME_OMIT &&
-			    times[1].tv_nsec == UTIME_NOW))
-		    && !is_owner_or_cap(inode))
-			goto mnt_drop_write_and_out;
+		newattrs.ia_valid |= ATTR_TIMES_SET;
 	} else {
-
 		/*
 		 * If times is NULL (or both times are UTIME_NOW),
 		 * then we need to check permissions, because
-- 
cgit v1.2.3


From e9b76fedc61235da80b6b7f81dfd67ec224dfb49 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Jul 2008 15:01:27 +0200
Subject: [patch 2/4] vfs: utimes cleanup

Untange the mess that is do_utimes().  Add kerneldoc comment to
do_utimes().

CC: Ulrich Drepper <drepper@redhat.com>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/utimes.c | 114 ++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 65 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/utimes.c b/fs/utimes.c
index ecf8941ba34..8e09dbdfd7f 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -48,54 +48,15 @@ static bool nsec_valid(long nsec)
 	return nsec >= 0 && nsec <= 999999999;
 }
 
-/* If times==NULL, set access and modification to current time,
- * must be owner or have write permission.
- * Else, update from *times, must be owner or super user.
- */
-long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
+static int utimes_common(struct path *path, struct timespec *times)
 {
 	int error;
-	struct nameidata nd;
-	struct dentry *dentry;
-	struct inode *inode;
 	struct iattr newattrs;
-	struct file *f = NULL;
-	struct vfsmount *mnt;
-
-	error = -EINVAL;
-	if (times && (!nsec_valid(times[0].tv_nsec) ||
-		      !nsec_valid(times[1].tv_nsec))) {
-		goto out;
-	}
-
-	if (flags & ~AT_SYMLINK_NOFOLLOW)
-		goto out;
-
-	if (filename == NULL && dfd != AT_FDCWD) {
-		error = -EINVAL;
-		if (flags & AT_SYMLINK_NOFOLLOW)
-			goto out;
-
-		error = -EBADF;
-		f = fget(dfd);
-		if (!f)
-			goto out;
-		dentry = f->f_path.dentry;
-		mnt = f->f_path.mnt;
-	} else {
-		error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
-		if (error)
-			goto out;
-
-		dentry = nd.path.dentry;
-		mnt = nd.path.mnt;
-	}
-
-	inode = dentry->d_inode;
+	struct inode *inode = path->dentry->d_inode;
 
-	error = mnt_want_write(mnt);
+	error = mnt_want_write(path->mnt);
 	if (error)
-		goto dput_and_out;
+		goto out;
 
 	if (times && times[0].tv_nsec == UTIME_NOW &&
 		     times[1].tv_nsec == UTIME_NOW)
@@ -145,15 +106,70 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
 		}
 	}
 	mutex_lock(&inode->i_mutex);
-	error = notify_change(dentry, &newattrs);
+	error = notify_change(path->dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
+
 mnt_drop_write_and_out:
-	mnt_drop_write(mnt);
-dput_and_out:
-	if (f)
-		fput(f);
-	else
+	mnt_drop_write(path->mnt);
+out:
+	return error;
+}
+
+/*
+ * do_utimes - change times on filename or file descriptor
+ * @dfd: open file descriptor, -1 or AT_FDCWD
+ * @filename: path name or NULL
+ * @times: new times or NULL
+ * @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment)
+ *
+ * If filename is NULL and dfd refers to an open file, then operate on
+ * the file.  Otherwise look up filename, possibly using dfd as a
+ * starting point.
+ *
+ * If times==NULL, set access and modification to current time,
+ * must be owner or have write permission.
+ * Else, update from *times, must be owner or super user.
+ */
+long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
+{
+	int error = -EINVAL;
+
+	if (times && (!nsec_valid(times[0].tv_nsec) ||
+		      !nsec_valid(times[1].tv_nsec))) {
+		goto out;
+	}
+
+	if (flags & ~AT_SYMLINK_NOFOLLOW)
+		goto out;
+
+	if (filename == NULL && dfd != AT_FDCWD) {
+		struct file *file;
+
+		if (flags & AT_SYMLINK_NOFOLLOW)
+			goto out;
+
+		file = fget(dfd);
+		error = -EBADF;
+		if (!file)
+			goto out;
+
+		error = utimes_common(&file->f_path, times);
+		fput(file);
+	} else {
+		struct nameidata nd;
+		int lookup_flags = 0;
+
+		if (!(flags & AT_SYMLINK_NOFOLLOW))
+			lookup_flags |= LOOKUP_FOLLOW;
+
+		error = __user_walk_fd(dfd, filename, lookup_flags, &nd);
+		if (error)
+			goto out;
+
+		error = utimes_common(&nd.path, times);
 		path_put(&nd.path);
+	}
+
 out:
 	return error;
 }
-- 
cgit v1.2.3


From b1da47e29e467f1ec36dc78d009bfb109fd533c7 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Jul 2008 15:01:28 +0200
Subject: [patch 3/4] fat: dont call notify_change

The FAT_IOCTL_SET_ATTRIBUTES ioctl() calls notify_change() to change
the file mode before changing the inode attributes.  Replace with
explicit calls to security_inode_setattr(), fat_setattr() and
fsnotify_change().

This is equivalent to the original.  The reason it is needed, is that
later in the series we move the immutable check into notify_change().
That would break the FAT_IOCTL_SET_ATTRIBUTES ioctl, as it needs to
perform the mode change regardless of the immutability of the file.

[Fix error if fat is built as a module.  Thanks to OGAWA Hirofumi for
noticing.]

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fat/file.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fat/file.c b/fs/fat/file.c
index c672df4036e..8707a8cfa02 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -15,6 +15,8 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
+#include <linux/fsnotify.h>
+#include <linux/security.h>
 
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		      unsigned int cmd, unsigned long arg)
@@ -64,6 +66,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 
 		/* Equivalent to a chmod() */
 		ia.ia_valid = ATTR_MODE | ATTR_CTIME;
+		ia.ia_ctime = current_fs_time(inode->i_sb);
 		if (is_dir) {
 			ia.ia_mode = MSDOS_MKMODE(attr,
 				S_IRWXUGO & ~sbi->options.fs_dmask)
@@ -90,11 +93,21 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 			}
 		}
 
+		/*
+		 * The security check is questionable...  We single
+		 * out the RO attribute for checking by the security
+		 * module, just because it maps to a file mode.
+		 */
+		err = security_inode_setattr(filp->f_path.dentry, &ia);
+		if (err)
+			goto up;
+
 		/* This MUST be done before doing anything irreversible... */
-		err = notify_change(filp->f_path.dentry, &ia);
+		err = fat_setattr(filp->f_path.dentry, &ia);
 		if (err)
 			goto up;
 
+		fsnotify_change(filp->f_path.dentry, ia.ia_valid);
 		if (sbi->options.sys_immutable) {
 			if (attr & ATTR_SYS)
 				inode->i_flags |= S_IMMUTABLE;
-- 
cgit v1.2.3


From beb29e058c35ab69e96e455a12ccf7505f6de425 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Jul 2008 15:01:29 +0200
Subject: [patch 4/4] vfs: immutable inode checking cleanup

Move the immutable and append-only checks from chmod, chown and utimes
into notify_change().  Checks for immutable and append-only files are
always performed by the VFS and not by the filesystem (see
permission() and may_...() in namei.c), so these belong in
notify_change(), and not in inode_change_ok().

This should be completely equivalent.

CC: Ulrich Drepper <drepper@redhat.com>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c   |  5 +++++
 fs/open.c   | 24 ++----------------------
 fs/utimes.c |  4 ----
 3 files changed, 7 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/attr.c b/fs/attr.c
index 765fc75fab3..26c71ba1eed 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -108,6 +108,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 	struct timespec now;
 	unsigned int ia_valid = attr->ia_valid;
 
+	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
+		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+			return -EPERM;
+	}
+
 	now = current_fs_time(inode->i_sb);
 
 	attr->ia_ctime = now;
diff --git a/fs/open.c b/fs/open.c
index 3317e1909b2..3b3c43674be 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -588,9 +588,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 	err = mnt_want_write(file->f_path.mnt);
 	if (err)
 		goto out_putf;
-	err = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		goto out_drop_write;
 	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
 		mode = inode->i_mode;
@@ -598,8 +595,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 	err = notify_change(dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
-
-out_drop_write:
 	mnt_drop_write(file->f_path.mnt);
 out_putf:
 	fput(file);
@@ -623,11 +618,6 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto dput_and_out;
-
-	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		goto out_drop_write;
-
 	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
 		mode = inode->i_mode;
@@ -635,8 +625,6 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 	error = notify_change(nd.path.dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
-
-out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 dput_and_out:
 	path_put(&nd.path);
@@ -651,18 +639,10 @@ asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
 
 static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 {
-	struct inode * inode;
+	struct inode *inode = dentry->d_inode;
 	int error;
 	struct iattr newattrs;
 
-	error = -ENOENT;
-	if (!(inode = dentry->d_inode)) {
-		printk(KERN_ERR "chown_common: NULL inode\n");
-		goto out;
-	}
-	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		goto out;
 	newattrs.ia_valid =  ATTR_CTIME;
 	if (user != (uid_t) -1) {
 		newattrs.ia_valid |= ATTR_UID;
@@ -678,7 +658,7 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 	mutex_lock(&inode->i_mutex);
 	error = notify_change(dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
-out:
+
 	return error;
 }
 
diff --git a/fs/utimes.c b/fs/utimes.c
index 8e09dbdfd7f..dad679d3a15 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -64,10 +64,6 @@ static int utimes_common(struct path *path, struct timespec *times)
 
 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 	if (times) {
-		error = -EPERM;
-                if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-			goto mnt_drop_write_and_out;
-
 		if (times[0].tv_nsec == UTIME_OMIT)
 			newattrs.ia_valid &= ~ATTR_ATIME;
 		else if (times[0].tv_nsec != UTIME_NOW) {
-- 
cgit v1.2.3


From e56b6a5dda1a36ffaa532df6f975ea324298fa4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 May 2008 07:53:34 +0200
Subject: Re: [PATCH 3/6] vfs: open_exec cleanup

On Mon, May 19, 2008 at 12:01:49AM +0200, Marcin Slusarz wrote:
> open_exec is needlessly indented, calls ERR_PTR with 0 argument
> (which is not valid errno) and jumps into middle of function
> just to return value.
> So clean it up a bit.

Still looks rather messy.  See below for a better version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c | 58 ++++++++++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 0ba5d355c5a..346e3f69c6e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -656,38 +656,40 @@ EXPORT_SYMBOL(setup_arg_pages);
 struct file *open_exec(const char *name)
 {
 	struct nameidata nd;
-	int err;
 	struct file *file;
+	int err;
 
-	err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
-	file = ERR_PTR(err);
-
-	if (!err) {
-		struct inode *inode = nd.path.dentry->d_inode;
-		file = ERR_PTR(-EACCES);
-		if (S_ISREG(inode->i_mode)) {
-			int err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
-			file = ERR_PTR(err);
-			if (!err) {
-				file = nameidata_to_filp(&nd,
-							O_RDONLY|O_LARGEFILE);
-				if (!IS_ERR(file)) {
-					err = deny_write_access(file);
-					if (err) {
-						fput(file);
-						file = ERR_PTR(err);
-					}
-				}
-out:
-				return file;
-			}
-		}
-		release_open_intent(&nd);
-		path_put(&nd.path);
+	err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd,
+				FMODE_READ|FMODE_EXEC);
+	if (err)
+		goto out;
+
+	err = -EACCES;
+	if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
+		goto out_path_put;
+
+	err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
+	if (err)
+		goto out_path_put;
+
+	file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
+	if (IS_ERR(file))
+		return file;
+
+	err = deny_write_access(file);
+	if (err) {
+		fput(file);
+		goto out;
 	}
-	goto out;
-}
 
+	return file;
+
+ out_path_put:
+	release_open_intent(&nd);
+	path_put(&nd.path);
+ out:
+	return ERR_PTR(err);
+}
 EXPORT_SYMBOL(open_exec);
 
 int kernel_read(struct file *file, unsigned long offset,
-- 
cgit v1.2.3


From 30524472c2f728c20d6bf35191042a5d455c0a64 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 22 Jul 2008 00:02:33 -0400
Subject: [PATCH] take noexec checks to very few callers that care

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c  |  7 +++++++
 fs/namei.c |  9 ---------
 fs/open.c  | 10 ++++++++++
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 346e3f69c6e..eca58c29ede 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -118,6 +118,10 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
 		goto exit;
 
+	error = -EACCES;
+	if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+		goto exit;
+
 	error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
@@ -668,6 +672,9 @@ struct file *open_exec(const char *name)
 	if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
 		goto out_path_put;
 
+	if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+		goto out_path_put;
+
 	err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
 	if (err)
 		goto out_path_put;
diff --git a/fs/namei.c b/fs/namei.c
index 6d75430358a..396cb3e5c36 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -252,15 +252,6 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 			return -EACCES;
 	}
 
-	if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
-		/*
-		 * MAY_EXEC on regular files is denied if the fs is mounted
-		 * with the "noexec" flag.
-		 */
-		if (mnt && (mnt->mnt_flags & MNT_NOEXEC))
-			return -EACCES;
-	}
-
 	/* Ordinary permission routines do not understand MAY_APPEND. */
 	if (inode->i_op && inode->i_op->permission) {
 		retval = inode->i_op->permission(inode, mask);
diff --git a/fs/open.c b/fs/open.c
index 3b3c43674be..d5e421ad0cf 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -461,6 +461,16 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 	if (res)
 		goto out;
 
+	if ((mode & MAY_EXEC) && S_ISREG(nd.path.dentry->d_inode->i_mode)) {
+		/*
+		 * MAY_EXEC on regular files is denied if the fs is mounted
+		 * with the "noexec" flag.
+		 */
+		res = -EACCES;
+		if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+			goto out_path_release;
+	}
+
 	res = vfs_permission(&nd, mode | MAY_ACCESS);
 	/* SuS v2 requires we report a read only fs too */
 	if(res || !(mode & S_IWOTH) ||
-- 
cgit v1.2.3


From f419a2e3b64def707e1384ee38abb77f99af5f6d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 22 Jul 2008 00:07:17 -0400
Subject: [PATCH] kill nameidata passing to permission(), rename to
 inode_permission()

Incidentally, the name that gives hundreds of false positives on grep
is not a good idea...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/inode.c |  2 +-
 fs/namei.c          | 22 +++++++++-------------
 fs/nfsd/nfsfh.c     |  2 +-
 fs/nfsd/vfs.c       |  4 ++--
 fs/utimes.c         |  2 +-
 fs/xattr.c          |  2 +-
 6 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index f25caf2b088..89209f00f9c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -830,7 +830,7 @@ out:
 static int
 ecryptfs_permission(struct inode *inode, int mask)
 {
-	return permission(ecryptfs_inode_to_lower(inode), mask, NULL);
+	return inode_permission(ecryptfs_inode_to_lower(inode), mask);
 }
 
 /**
diff --git a/fs/namei.c b/fs/namei.c
index 396cb3e5c36..5029b93ebbd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -227,13 +227,9 @@ int generic_permission(struct inode *inode, int mask,
 	return -EACCES;
 }
 
-int permission(struct inode *inode, int mask, struct nameidata *nd)
+int inode_permission(struct inode *inode, int mask)
 {
 	int retval;
-	struct vfsmount *mnt = NULL;
-
-	if (nd)
-		mnt = nd->path.mnt;
 
 	if (mask & MAY_WRITE) {
 		umode_t mode = inode->i_mode;
@@ -293,7 +289,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
  */
 int vfs_permission(struct nameidata *nd, int mask)
 {
-	return permission(nd->path.dentry->d_inode, mask, nd);
+	return inode_permission(nd->path.dentry->d_inode, mask);
 }
 
 /**
@@ -310,7 +306,7 @@ int vfs_permission(struct nameidata *nd, int mask)
  */
 int file_permission(struct file *file, int mask)
 {
-	return permission(file->f_path.dentry->d_inode, mask, NULL);
+	return inode_permission(file->f_path.dentry->d_inode, mask);
 }
 
 /*
@@ -1262,7 +1258,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
 {
 	int err;
 
-	err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd);
+	err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
 	if (err)
 		return ERR_PTR(err);
 	return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1310,7 +1306,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 	if (err)
 		return ERR_PTR(err);
 
-	err = permission(base->d_inode, MAY_EXEC, NULL);
+	err = inode_permission(base->d_inode, MAY_EXEC);
 	if (err)
 		return ERR_PTR(err);
 	return __lookup_hash(&this, base, NULL);
@@ -1400,7 +1396,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 	BUG_ON(victim->d_parent->d_inode != dir);
 	audit_inode_child(victim->d_name.name, victim, dir);
 
-	error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
+	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
 	if (error)
 		return error;
 	if (IS_APPEND(dir))
@@ -1437,7 +1433,7 @@ static inline int may_create(struct inode *dir, struct dentry *child,
 		return -EEXIST;
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
-	return permission(dir,MAY_WRITE | MAY_EXEC, nd);
+	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
 }
 
 /* 
@@ -2543,7 +2539,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 	 * we'll need to flip '..'.
 	 */
 	if (new_dir != old_dir) {
-		error = permission(old_dentry->d_inode, MAY_WRITE, NULL);
+		error = inode_permission(old_dentry->d_inode, MAY_WRITE);
 		if (error)
 			return error;
 	}
@@ -2897,7 +2893,7 @@ EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(path_lookup);
 EXPORT_SYMBOL(vfs_path_lookup);
-EXPORT_SYMBOL(permission);
+EXPORT_SYMBOL(inode_permission);
 EXPORT_SYMBOL(vfs_permission);
 EXPORT_SYMBOL(file_permission);
 EXPORT_SYMBOL(unlock_rename);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index f45451eb1e3..ea37c96f044 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -51,7 +51,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
 		/* make sure parents give x permission to user */
 		int err;
 		parent = dget_parent(tdentry);
-		err = permission(parent->d_inode, MAY_EXEC, NULL);
+		err = inode_permission(parent->d_inode, MAY_EXEC);
 		if (err < 0) {
 			dput(parent);
 			break;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ad1ad59e374..18060bed526 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1953,12 +1953,12 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 		return 0;
 
 	/* This assumes  NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
-	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
+	err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
 
 	/* Allow read access to binaries even when mode 111 */
 	if (err == -EACCES && S_ISREG(inode->i_mode) &&
 	    acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
-		err = permission(inode, MAY_EXEC, NULL);
+		err = inode_permission(inode, MAY_EXEC);
 
 	return err? nfserrno(err) : 0;
 }
diff --git a/fs/utimes.c b/fs/utimes.c
index dad679d3a15..dc28b782625 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -96,7 +96,7 @@ static int utimes_common(struct path *path, struct timespec *times)
 			goto mnt_drop_write_and_out;
 
 		if (!is_owner_or_cap(inode)) {
-			error = permission(inode, MAY_WRITE, NULL);
+			error = inode_permission(inode, MAY_WRITE);
 			if (error)
 				goto mnt_drop_write_and_out;
 		}
diff --git a/fs/xattr.c b/fs/xattr.c
index 4706a8b1f49..b96222e05ba 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -63,7 +63,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
 			return -EPERM;
 	}
 
-	return permission(inode, mask, NULL);
+	return inode_permission(inode, mask);
 }
 
 int
-- 
cgit v1.2.3


From 256984a83880ff7ac78055cb87baea48137f0b77 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 22 Jul 2008 08:09:30 -0400
Subject: [PATCH] preparation to __user_walk_fd cleanup

Almost all users __user_walk_fd() and friends care only about struct path.
Get rid of the few that do not.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inotify_user.c |  2 +-
 fs/open.c         | 18 ++++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index fe79c25d95d..9b99ebf2888 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -365,7 +365,7 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
 	if (error)
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
-	error = vfs_permission(nd, MAY_READ);
+	error = inode_permission(nd->path.dentry->d_inode, MAY_READ);
 	if (error)
 		path_put(&nd->path);
 	return error;
diff --git a/fs/open.c b/fs/open.c
index d5e421ad0cf..e94266700ed 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -251,7 +251,7 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 	if (error)
 		goto dput_and_out;
 
-	error = vfs_permission(&nd, MAY_WRITE);
+	error = inode_permission(inode, MAY_WRITE);
 	if (error)
 		goto mnt_drop_write_and_out;
 
@@ -426,6 +426,7 @@ out:
 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 {
 	struct nameidata nd;
+	struct inode *inode;
 	int old_fsuid, old_fsgid;
 	kernel_cap_t uninitialized_var(old_cap);  /* !SECURE_NO_SETUID_FIXUP */
 	int res;
@@ -461,7 +462,9 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 	if (res)
 		goto out;
 
-	if ((mode & MAY_EXEC) && S_ISREG(nd.path.dentry->d_inode->i_mode)) {
+	inode = nd.path.dentry->d_inode;
+
+	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		/*
 		 * MAY_EXEC on regular files is denied if the fs is mounted
 		 * with the "noexec" flag.
@@ -471,10 +474,9 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 			goto out_path_release;
 	}
 
-	res = vfs_permission(&nd, mode | MAY_ACCESS);
+	res = inode_permission(inode, mode | MAY_ACCESS);
 	/* SuS v2 requires we report a read only fs too */
-	if(res || !(mode & S_IWOTH) ||
-	   special_file(nd.path.dentry->d_inode->i_mode))
+	if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
 		goto out_path_release;
 	/*
 	 * This is a rare case where using __mnt_is_readonly()
@@ -515,7 +517,7 @@ asmlinkage long sys_chdir(const char __user * filename)
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
@@ -544,7 +546,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
 	if (!S_ISDIR(inode->i_mode))
 		goto out_putf;
 
-	error = file_permission(file, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
 	if (!error)
 		set_fs_pwd(current->fs, &file->f_path);
 out_putf:
@@ -562,7 +564,7 @@ asmlinkage long sys_chroot(const char __user * filename)
 	if (error)
 		goto out;
 
-	error = vfs_permission(&nd, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
-- 
cgit v1.2.3


From 2d8f30380ab8c706f4e0a8f1aaa22b5886e9ac8a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 22 Jul 2008 09:59:21 -0400
Subject: [PATCH] sanitize __user_walk_fd() et.al.

* do not pass nameidata; struct path is all the callers want.
* switch to new helpers:
	user_path_at(dfd, pathname, flags, &path)
	user_path(pathname, &path)
	user_lpath(pathname, &path)
	user_path_dir(pathname, &path)  (fail if not a directory)
  The last 3 are trivial macro wrappers for the first one.
* remove nameidata in callers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coda/pioctl.c             |  14 ++---
 fs/compat.c                  |  20 +++----
 fs/inotify_user.c            |  22 ++++----
 fs/namei.c                   |  36 ++++++-------
 fs/namespace.c               |  74 +++++++++++++-------------
 fs/open.c                    | 124 +++++++++++++++++++++----------------------
 fs/stat.c                    |  32 +++++------
 fs/utimes.c                  |   8 +--
 fs/xattr.c                   |  96 ++++++++++++++++-----------------
 fs/xfs/linux-2.6/xfs_ioctl.c |  14 +++--
 10 files changed, 218 insertions(+), 222 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c38a98974fb..c51365422aa 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -49,7 +49,7 @@ static int coda_ioctl_permission(struct inode *inode, int mask)
 static int coda_pioctl(struct inode * inode, struct file * filp, 
                        unsigned int cmd, unsigned long user_data)
 {
-	struct nameidata nd;
+	struct path path;
         int error;
 	struct PioctlData data;
         struct inode *target_inode = NULL;
@@ -64,21 +64,21 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
          * Look up the pathname. Note that the pathname is in 
          * user memory, and namei takes care of this
          */
-        if ( data.follow ) {
-                error = user_path_walk(data.path, &nd);
+        if (data.follow) {
+                error = user_path(data.path, &path);
 	} else {
-	        error = user_path_walk_link(data.path, &nd);
+	        error = user_lpath(data.path, &path);
 	}
 		
 	if ( error ) {
 		return error;
         } else {
-		target_inode = nd.path.dentry->d_inode;
+		target_inode = path.dentry->d_inode;
 	}
 	
 	/* return if it is not a Coda inode */
 	if ( target_inode->i_sb != inode->i_sb ) {
-		path_put(&nd.path);
+		path_put(&path);
 	        return  -EINVAL;
 	}
 
@@ -87,7 +87,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
 
 	error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
 
-	path_put(&nd.path);
+	path_put(&path);
         return error;
 }
 
diff --git a/fs/compat.c b/fs/compat.c
index 106eba28ec5..c9d1472e65c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -234,18 +234,18 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
  * The following statfs calls are copies of code from fs/open.c and
  * should be checked against those from time to time
  */
-asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs __user *buf)
+asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.path.dentry, &tmp);
+		error = vfs_statfs(path.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs(buf, &tmp);
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
@@ -299,21 +299,21 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
 	return 0;
 }
 
-asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, struct compat_statfs64 __user *buf)
+asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
 	if (sz != sizeof(*buf))
 		return -EINVAL;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(nd.path.dentry, &tmp);
+		error = vfs_statfs(path.dentry, &tmp);
 		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 9b99ebf2888..60249429a25 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -354,20 +354,20 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
 }
 
 /*
- * find_inode - resolve a user-given path to a specific inode and return a nd
+ * find_inode - resolve a user-given path to a specific inode
  */
-static int find_inode(const char __user *dirname, struct nameidata *nd,
+static int find_inode(const char __user *dirname, struct path *path,
 		      unsigned flags)
 {
 	int error;
 
-	error = __user_walk(dirname, flags, nd);
+	error = user_path_at(AT_FDCWD, dirname, flags, path);
 	if (error)
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
-	error = inode_permission(nd->path.dentry->d_inode, MAY_READ);
+	error = inode_permission(path->dentry->d_inode, MAY_READ);
 	if (error)
-		path_put(&nd->path);
+		path_put(path);
 	return error;
 }
 
@@ -650,11 +650,11 @@ asmlinkage long sys_inotify_init(void)
 	return sys_inotify_init1(0);
 }
 
-asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
+asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
 {
 	struct inode *inode;
 	struct inotify_device *dev;
-	struct nameidata nd;
+	struct path path;
 	struct file *filp;
 	int ret, fput_needed;
 	unsigned flags = 0;
@@ -674,12 +674,12 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	if (mask & IN_ONLYDIR)
 		flags |= LOOKUP_DIRECTORY;
 
-	ret = find_inode(path, &nd, flags);
+	ret = find_inode(pathname, &path, flags);
 	if (unlikely(ret))
 		goto fput_and_out;
 
-	/* inode held in place by reference to nd; dev by fget on fd */
-	inode = nd.path.dentry->d_inode;
+	/* inode held in place by reference to path; dev by fget on fd */
+	inode = path.dentry->d_inode;
 	dev = filp->private_data;
 
 	mutex_lock(&dev->up_mutex);
@@ -688,7 +688,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 		ret = create_watch(dev, inode, mask);
 	mutex_unlock(&dev->up_mutex);
 
-	path_put(&nd.path);
+	path_put(&path);
 fput_and_out:
 	fput_light(filp, fput_needed);
 	return ret;
diff --git a/fs/namei.c b/fs/namei.c
index 5029b93ebbd..edb5e973f9b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1334,24 +1334,24 @@ struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
 	return __lookup_hash(&this, base, NULL);
 }
 
-int __user_walk_fd(int dfd, const char __user *name, unsigned flags,
-			    struct nameidata *nd)
+int user_path_at(int dfd, const char __user *name, unsigned flags,
+		 struct path *path)
 {
+	struct nameidata nd;
 	char *tmp = getname(name);
 	int err = PTR_ERR(tmp);
-
 	if (!IS_ERR(tmp)) {
-		err = do_path_lookup(dfd, tmp, flags, nd);
+
+		BUG_ON(flags & LOOKUP_PARENT);
+
+		err = do_path_lookup(dfd, tmp, flags, &nd);
 		putname(tmp);
+		if (!err)
+			*path = nd.path;
 	}
 	return err;
 }
 
-int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
-{
-	return __user_walk_fd(AT_FDCWD, name, flags, nd);
-}
-
 /*
  * It's inline, so penalty for filesystems that don't use sticky bit is
  * minimal.
@@ -2446,7 +2446,8 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 			   int flags)
 {
 	struct dentry *new_dentry;
-	struct nameidata nd, old_nd;
+	struct nameidata nd;
+	struct path old_path;
 	int error;
 	char * to;
 
@@ -2457,16 +2458,16 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	if (IS_ERR(to))
 		return PTR_ERR(to);
 
-	error = __user_walk_fd(olddfd, oldname,
-			       flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
-			       &old_nd);
+	error = user_path_at(olddfd, oldname,
+			     flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
+			     &old_path);
 	if (error)
 		goto exit;
 	error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
 	if (error)
 		goto out;
 	error = -EXDEV;
-	if (old_nd.path.mnt != nd.path.mnt)
+	if (old_path.mnt != nd.path.mnt)
 		goto out_release;
 	new_dentry = lookup_create(&nd, 0);
 	error = PTR_ERR(new_dentry);
@@ -2475,7 +2476,7 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
-	error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
+	error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(new_dentry);
@@ -2484,7 +2485,7 @@ out_unlock:
 out_release:
 	path_put(&nd.path);
 out:
-	path_put(&old_nd.path);
+	path_put(&old_path);
 exit:
 	putname(to);
 
@@ -2877,8 +2878,7 @@ const struct inode_operations page_symlink_inode_operations = {
 	.put_link	= page_put_link,
 };
 
-EXPORT_SYMBOL(__user_walk);
-EXPORT_SYMBOL(__user_walk_fd);
+EXPORT_SYMBOL(user_path_at);
 EXPORT_SYMBOL(follow_down);
 EXPORT_SYMBOL(follow_up);
 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
diff --git a/fs/namespace.c b/fs/namespace.c
index 26380f59953..411728c0c8b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1130,27 +1130,27 @@ static int do_umount(struct vfsmount *mnt, int flags)
 
 asmlinkage long sys_umount(char __user * name, int flags)
 {
-	struct nameidata nd;
+	struct path path;
 	int retval;
 
-	retval = __user_walk(name, LOOKUP_FOLLOW, &nd);
+	retval = user_path(name, &path);
 	if (retval)
 		goto out;
 	retval = -EINVAL;
-	if (nd.path.dentry != nd.path.mnt->mnt_root)
+	if (path.dentry != path.mnt->mnt_root)
 		goto dput_and_out;
-	if (!check_mnt(nd.path.mnt))
+	if (!check_mnt(path.mnt))
 		goto dput_and_out;
 
 	retval = -EPERM;
 	if (!capable(CAP_SYS_ADMIN))
 		goto dput_and_out;
 
-	retval = do_umount(nd.path.mnt, flags);
+	retval = do_umount(path.mnt, flags);
 dput_and_out:
 	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
-	dput(nd.path.dentry);
-	mntput_no_expire(nd.path.mnt);
+	dput(path.dentry);
+	mntput_no_expire(path.mnt);
 out:
 	return retval;
 }
@@ -2179,28 +2179,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 			       const char __user * put_old)
 {
 	struct vfsmount *tmp;
-	struct nameidata new_nd, old_nd;
-	struct path parent_path, root_parent, root;
+	struct path new, old, parent_path, root_parent, root;
 	int error;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
-			    &new_nd);
+	error = user_path_dir(new_root, &new);
 	if (error)
 		goto out0;
 	error = -EINVAL;
-	if (!check_mnt(new_nd.path.mnt))
+	if (!check_mnt(new.mnt))
 		goto out1;
 
-	error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd);
+	error = user_path_dir(put_old, &old);
 	if (error)
 		goto out1;
 
-	error = security_sb_pivotroot(&old_nd.path, &new_nd.path);
+	error = security_sb_pivotroot(&old, &new);
 	if (error) {
-		path_put(&old_nd.path);
+		path_put(&old);
 		goto out1;
 	}
 
@@ -2209,69 +2207,69 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
 	path_get(&current->fs->root);
 	read_unlock(&current->fs->lock);
 	down_write(&namespace_sem);
-	mutex_lock(&old_nd.path.dentry->d_inode->i_mutex);
+	mutex_lock(&old.dentry->d_inode->i_mutex);
 	error = -EINVAL;
-	if (IS_MNT_SHARED(old_nd.path.mnt) ||
-		IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) ||
+	if (IS_MNT_SHARED(old.mnt) ||
+		IS_MNT_SHARED(new.mnt->mnt_parent) ||
 		IS_MNT_SHARED(root.mnt->mnt_parent))
 		goto out2;
 	if (!check_mnt(root.mnt))
 		goto out2;
 	error = -ENOENT;
-	if (IS_DEADDIR(new_nd.path.dentry->d_inode))
+	if (IS_DEADDIR(new.dentry->d_inode))
 		goto out2;
-	if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry))
+	if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
 		goto out2;
-	if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry))
+	if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
 		goto out2;
 	error = -EBUSY;
-	if (new_nd.path.mnt == root.mnt ||
-	    old_nd.path.mnt == root.mnt)
+	if (new.mnt == root.mnt ||
+	    old.mnt == root.mnt)
 		goto out2; /* loop, on the same file system  */
 	error = -EINVAL;
 	if (root.mnt->mnt_root != root.dentry)
 		goto out2; /* not a mountpoint */
 	if (root.mnt->mnt_parent == root.mnt)
 		goto out2; /* not attached */
-	if (new_nd.path.mnt->mnt_root != new_nd.path.dentry)
+	if (new.mnt->mnt_root != new.dentry)
 		goto out2; /* not a mountpoint */
-	if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt)
+	if (new.mnt->mnt_parent == new.mnt)
 		goto out2; /* not attached */
 	/* make sure we can reach put_old from new_root */
-	tmp = old_nd.path.mnt;
+	tmp = old.mnt;
 	spin_lock(&vfsmount_lock);
-	if (tmp != new_nd.path.mnt) {
+	if (tmp != new.mnt) {
 		for (;;) {
 			if (tmp->mnt_parent == tmp)
 				goto out3; /* already mounted on put_old */
-			if (tmp->mnt_parent == new_nd.path.mnt)
+			if (tmp->mnt_parent == new.mnt)
 				break;
 			tmp = tmp->mnt_parent;
 		}
-		if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry))
+		if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
 			goto out3;
-	} else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry))
+	} else if (!is_subdir(old.dentry, new.dentry))
 		goto out3;
-	detach_mnt(new_nd.path.mnt, &parent_path);
+	detach_mnt(new.mnt, &parent_path);
 	detach_mnt(root.mnt, &root_parent);
 	/* mount old root on put_old */
-	attach_mnt(root.mnt, &old_nd.path);
+	attach_mnt(root.mnt, &old);
 	/* mount new_root on / */
-	attach_mnt(new_nd.path.mnt, &root_parent);
+	attach_mnt(new.mnt, &root_parent);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	spin_unlock(&vfsmount_lock);
-	chroot_fs_refs(&root, &new_nd.path);
-	security_sb_post_pivotroot(&root, &new_nd.path);
+	chroot_fs_refs(&root, &new);
+	security_sb_post_pivotroot(&root, &new);
 	error = 0;
 	path_put(&root_parent);
 	path_put(&parent_path);
 out2:
-	mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex);
+	mutex_unlock(&old.dentry->d_inode->i_mutex);
 	up_write(&namespace_sem);
 	path_put(&root);
-	path_put(&old_nd.path);
+	path_put(&old);
 out1:
-	path_put(&new_nd.path);
+	path_put(&new);
 out0:
 	return error;
 out3:
diff --git a/fs/open.c b/fs/open.c
index e94266700ed..3fe1a6857c7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -122,37 +122,37 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
 	return 0;
 }
 
-asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf)
+asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * buf)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(nd.path.dentry, &tmp);
+		error = vfs_statfs_native(path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
 
 
-asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf)
+asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct statfs64 __user *buf)
 {
-	struct nameidata nd;
+	struct path path;
 	long error;
 
 	if (sz != sizeof(*buf))
 		return -EINVAL;
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(nd.path.dentry, &tmp);
+		error = vfs_statfs64(path.dentry, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
@@ -223,20 +223,20 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 	return err;
 }
 
-static long do_sys_truncate(const char __user * path, loff_t length)
+static long do_sys_truncate(const char __user *pathname, loff_t length)
 {
-	struct nameidata nd;
-	struct inode * inode;
+	struct path path;
+	struct inode *inode;
 	int error;
 
 	error = -EINVAL;
 	if (length < 0)	/* sorry, but loff_t says... */
 		goto out;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		goto out;
-	inode = nd.path.dentry->d_inode;
+	inode = path.dentry->d_inode;
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
 	error = -EISDIR;
@@ -247,7 +247,7 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 	if (!S_ISREG(inode->i_mode))
 		goto dput_and_out;
 
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto dput_and_out;
 
@@ -274,15 +274,15 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error) {
 		DQUOT_INIT(inode);
-		error = do_truncate(nd.path.dentry, length, 0, NULL);
+		error = do_truncate(path.dentry, length, 0, NULL);
 	}
 
 put_write_and_out:
 	put_write_access(inode);
 mnt_drop_write_and_out:
-	mnt_drop_write(nd.path.mnt);
+	mnt_drop_write(path.mnt);
 dput_and_out:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -425,7 +425,7 @@ out:
  */
 asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 {
-	struct nameidata nd;
+	struct path path;
 	struct inode *inode;
 	int old_fsuid, old_fsgid;
 	kernel_cap_t uninitialized_var(old_cap);  /* !SECURE_NO_SETUID_FIXUP */
@@ -449,7 +449,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 		 * FIXME: There is a race here against sys_capset.  The
 		 * capabilities can change yet we will restore the old
 		 * value below.  We should hold task_capabilities_lock,
-		 * but we cannot because user_path_walk can sleep.
+		 * but we cannot because user_path_at can sleep.
 		 */
 #endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
 		if (current->uid)
@@ -458,11 +458,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 			old_cap = cap_set_effective(current->cap_permitted);
 	}
 
-	res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
 	if (res)
 		goto out;
 
-	inode = nd.path.dentry->d_inode;
+	inode = path.dentry->d_inode;
 
 	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		/*
@@ -470,7 +470,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 		 * with the "noexec" flag.
 		 */
 		res = -EACCES;
-		if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
+		if (path.mnt->mnt_flags & MNT_NOEXEC)
 			goto out_path_release;
 	}
 
@@ -488,11 +488,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
 	 * inherently racy and know that the fs may change
 	 * state before we even see this result.
 	 */
-	if (__mnt_is_readonly(nd.path.mnt))
+	if (__mnt_is_readonly(path.mnt))
 		res = -EROFS;
 
 out_path_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	current->fsuid = old_fsuid;
 	current->fsgid = old_fsgid;
@@ -510,21 +510,21 @@ asmlinkage long sys_access(const char __user *filename, int mode)
 
 asmlinkage long sys_chdir(const char __user * filename)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+	error = user_path_dir(filename, &path);
 	if (error)
 		goto out;
 
-	error = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
-	set_fs_pwd(current->fs, &nd.path);
+	set_fs_pwd(current->fs, &path);
 
 dput_and_out:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -557,14 +557,14 @@ out:
 
 asmlinkage long sys_chroot(const char __user * filename)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
+	error = user_path_dir(filename, &path);
 	if (error)
 		goto out;
 
-	error = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
 	if (error)
 		goto dput_and_out;
 
@@ -572,10 +572,10 @@ asmlinkage long sys_chroot(const char __user * filename)
 	if (!capable(CAP_SYS_CHROOT))
 		goto dput_and_out;
 
-	set_fs_root(current->fs, &nd.path);
+	set_fs_root(current->fs, &path);
 	error = 0;
 dput_and_out:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -617,17 +617,17 @@ out:
 asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 			     mode_t mode)
 {
-	struct nameidata nd;
-	struct inode * inode;
+	struct path path;
+	struct inode *inode;
 	int error;
 	struct iattr newattrs;
 
-	error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd);
+	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
 	if (error)
 		goto out;
-	inode = nd.path.dentry->d_inode;
+	inode = path.dentry->d_inode;
 
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto dput_and_out;
 	mutex_lock(&inode->i_mutex);
@@ -635,11 +635,11 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
 		mode = inode->i_mode;
 	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-	error = notify_change(nd.path.dentry, &newattrs);
+	error = notify_change(path.dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
-	mnt_drop_write(nd.path.mnt);
+	mnt_drop_write(path.mnt);
 dput_and_out:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -676,19 +676,19 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
 
 asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(filename, &nd);
+	error = user_path(filename, &path);
 	if (error)
 		goto out;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(nd.path.dentry, user, group);
-	mnt_drop_write(nd.path.mnt);
+	error = chown_common(path.dentry, user, group);
+	mnt_drop_write(path.mnt);
 out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
@@ -696,7 +696,7 @@ out:
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 			     gid_t group, int flag)
 {
-	struct nameidata nd;
+	struct path path;
 	int error = -EINVAL;
 	int follow;
 
@@ -704,35 +704,35 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
 		goto out;
 
 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
-	error = __user_walk_fd(dfd, filename, follow, &nd);
+	error = user_path_at(dfd, filename, follow, &path);
 	if (error)
 		goto out;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(nd.path.dentry, user, group);
-	mnt_drop_write(nd.path.mnt);
+	error = chown_common(path.dentry, user, group);
+	mnt_drop_write(path.mnt);
 out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
 
 asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk_link(filename, &nd);
+	error = user_lpath(filename, &path);
 	if (error)
 		goto out;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (error)
 		goto out_release;
-	error = chown_common(nd.path.dentry, user, group);
-	mnt_drop_write(nd.path.mnt);
+	error = chown_common(path.dentry, user, group);
+	mnt_drop_write(path.mnt);
 out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return error;
 }
diff --git a/fs/stat.c b/fs/stat.c
index 9cf41f719d5..7c46fbeb8b7 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
 
 int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd);
+	error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
 	if (!error) {
-		error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
-		path_put(&nd.path);
+		error = vfs_getattr(path.mnt, path.dentry, stat);
+		path_put(&path);
 	}
 	return error;
 }
@@ -77,13 +77,13 @@ EXPORT_SYMBOL(vfs_stat);
 
 int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = __user_walk_fd(dfd, name, 0, &nd);
+	error = user_path_at(dfd, name, 0, &path);
 	if (!error) {
-		error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat);
-		path_put(&nd.path);
+		error = vfs_getattr(path.mnt, path.dentry, stat);
+		path_put(&path);
 	}
 	return error;
 }
@@ -291,29 +291,29 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
 	return error;
 }
 
-asmlinkage long sys_readlinkat(int dfd, const char __user *path,
+asmlinkage long sys_readlinkat(int dfd, const char __user *pathname,
 				char __user *buf, int bufsiz)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
 	if (bufsiz <= 0)
 		return -EINVAL;
 
-	error = __user_walk_fd(dfd, path, 0, &nd);
+	error = user_path_at(dfd, pathname, 0, &path);
 	if (!error) {
-		struct inode *inode = nd.path.dentry->d_inode;
+		struct inode *inode = path.dentry->d_inode;
 
 		error = -EINVAL;
 		if (inode->i_op && inode->i_op->readlink) {
-			error = security_inode_readlink(nd.path.dentry);
+			error = security_inode_readlink(path.dentry);
 			if (!error) {
-				touch_atime(nd.path.mnt, nd.path.dentry);
-				error = inode->i_op->readlink(nd.path.dentry,
+				touch_atime(path.mnt, path.dentry);
+				error = inode->i_op->readlink(path.dentry,
 							      buf, bufsiz);
 			}
 		}
-		path_put(&nd.path);
+		path_put(&path);
 	}
 	return error;
 }
diff --git a/fs/utimes.c b/fs/utimes.c
index dc28b782625..6929e3e91d0 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -152,18 +152,18 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
 		error = utimes_common(&file->f_path, times);
 		fput(file);
 	} else {
-		struct nameidata nd;
+		struct path path;
 		int lookup_flags = 0;
 
 		if (!(flags & AT_SYMLINK_NOFOLLOW))
 			lookup_flags |= LOOKUP_FOLLOW;
 
-		error = __user_walk_fd(dfd, filename, lookup_flags, &nd);
+		error = user_path_at(dfd, filename, lookup_flags, &path);
 		if (error)
 			goto out;
 
-		error = utimes_common(&nd.path, times);
-		path_put(&nd.path);
+		error = utimes_common(&path, times);
+		path_put(&path);
 	}
 
 out:
diff --git a/fs/xattr.c b/fs/xattr.c
index b96222e05ba..468377e6653 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -252,40 +252,40 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
 }
 
 asmlinkage long
-sys_setxattr(const char __user *path, const char __user *name,
+sys_setxattr(const char __user *pathname, const char __user *name,
 	     const void __user *value, size_t size, int flags)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		return error;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = setxattr(nd.path.dentry, name, value, size, flags);
-		mnt_drop_write(nd.path.mnt);
+		error = setxattr(path.dentry, name, value, size, flags);
+		mnt_drop_write(path.mnt);
 	}
-	path_put(&nd.path);
+	path_put(&path);
 	return error;
 }
 
 asmlinkage long
-sys_lsetxattr(const char __user *path, const char __user *name,
+sys_lsetxattr(const char __user *pathname, const char __user *name,
 	      const void __user *value, size_t size, int flags)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk_link(path, &nd);
+	error = user_lpath(pathname, &path);
 	if (error)
 		return error;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = setxattr(nd.path.dentry, name, value, size, flags);
-		mnt_drop_write(nd.path.mnt);
+		error = setxattr(path.dentry, name, value, size, flags);
+		mnt_drop_write(path.mnt);
 	}
-	path_put(&nd.path);
+	path_put(&path);
 	return error;
 }
 
@@ -350,32 +350,32 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
 }
 
 asmlinkage ssize_t
-sys_getxattr(const char __user *path, const char __user *name,
+sys_getxattr(const char __user *pathname, const char __user *name,
 	     void __user *value, size_t size)
 {
-	struct nameidata nd;
+	struct path path;
 	ssize_t error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		return error;
-	error = getxattr(nd.path.dentry, name, value, size);
-	path_put(&nd.path);
+	error = getxattr(path.dentry, name, value, size);
+	path_put(&path);
 	return error;
 }
 
 asmlinkage ssize_t
-sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
+sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value,
 	      size_t size)
 {
-	struct nameidata nd;
+	struct path path;
 	ssize_t error;
 
-	error = user_path_walk_link(path, &nd);
+	error = user_lpath(pathname, &path);
 	if (error)
 		return error;
-	error = getxattr(nd.path.dentry, name, value, size);
-	path_put(&nd.path);
+	error = getxattr(path.dentry, name, value, size);
+	path_put(&path);
 	return error;
 }
 
@@ -425,30 +425,30 @@ listxattr(struct dentry *d, char __user *list, size_t size)
 }
 
 asmlinkage ssize_t
-sys_listxattr(const char __user *path, char __user *list, size_t size)
+sys_listxattr(const char __user *pathname, char __user *list, size_t size)
 {
-	struct nameidata nd;
+	struct path path;
 	ssize_t error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		return error;
-	error = listxattr(nd.path.dentry, list, size);
-	path_put(&nd.path);
+	error = listxattr(path.dentry, list, size);
+	path_put(&path);
 	return error;
 }
 
 asmlinkage ssize_t
-sys_llistxattr(const char __user *path, char __user *list, size_t size)
+sys_llistxattr(const char __user *pathname, char __user *list, size_t size)
 {
-	struct nameidata nd;
+	struct path path;
 	ssize_t error;
 
-	error = user_path_walk_link(path, &nd);
+	error = user_lpath(pathname, &path);
 	if (error)
 		return error;
-	error = listxattr(nd.path.dentry, list, size);
-	path_put(&nd.path);
+	error = listxattr(path.dentry, list, size);
+	path_put(&path);
 	return error;
 }
 
@@ -486,38 +486,38 @@ removexattr(struct dentry *d, const char __user *name)
 }
 
 asmlinkage long
-sys_removexattr(const char __user *path, const char __user *name)
+sys_removexattr(const char __user *pathname, const char __user *name)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk(path, &nd);
+	error = user_path(pathname, &path);
 	if (error)
 		return error;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = removexattr(nd.path.dentry, name);
-		mnt_drop_write(nd.path.mnt);
+		error = removexattr(path.dentry, name);
+		mnt_drop_write(path.mnt);
 	}
-	path_put(&nd.path);
+	path_put(&path);
 	return error;
 }
 
 asmlinkage long
-sys_lremovexattr(const char __user *path, const char __user *name)
+sys_lremovexattr(const char __user *pathname, const char __user *name)
 {
-	struct nameidata nd;
+	struct path path;
 	int error;
 
-	error = user_path_walk_link(path, &nd);
+	error = user_lpath(pathname, &path);
 	if (error)
 		return error;
-	error = mnt_want_write(nd.path.mnt);
+	error = mnt_want_write(path.mnt);
 	if (!error) {
-		error = removexattr(nd.path.dentry, name);
-		mnt_drop_write(nd.path.mnt);
+		error = removexattr(path.dentry, name);
+		mnt_drop_write(path.mnt);
 	}
-	path_put(&nd.path);
+	path_put(&path);
 	return error;
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a42ba9d7115..01939ba2d8d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -84,17 +84,15 @@ xfs_find_handle(
 	switch (cmd) {
 	case XFS_IOC_PATH_TO_FSHANDLE:
 	case XFS_IOC_PATH_TO_HANDLE: {
-		struct nameidata	nd;
-		int			error;
-
-		error = user_path_walk_link((const char __user *)hreq.path, &nd);
+		struct path path;
+		int error = user_lpath((const char __user *)hreq.path, &path);
 		if (error)
 			return error;
 
-		ASSERT(nd.path.dentry);
-		ASSERT(nd.path.dentry->d_inode);
-		inode = igrab(nd.path.dentry->d_inode);
-		path_put(&nd.path);
+		ASSERT(path.dentry);
+		ASSERT(path.dentry->d_inode);
+		inode = igrab(path.dentry->d_inode);
+		path_put(&path);
 		break;
 	}
 
-- 
cgit v1.2.3


From 2ad94ae654f5eb72fd3260b706aea645cf4a7791 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 21 Jul 2008 09:32:51 -0400
Subject: [PATCH] new (local) helper: user_path_parent()

Preparation to untangling intents mess: reduce the number of do_path_lookup()
callers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 137 +++++++++++++++++++++++++------------------------------------
 1 file changed, 57 insertions(+), 80 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index edb5e973f9b..38ceb6e06eb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1352,6 +1352,24 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
 	return err;
 }
 
+static int user_path_parent(int dfd, const char __user *path,
+			struct nameidata *nd, char **name)
+{
+	char *s = getname(path);
+	int error;
+
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd);
+	if (error)
+		putname(s);
+	else
+		*name = s;
+
+	return error;
+}
+
 /*
  * It's inline, so penalty for filesystems that don't use sticky bit is
  * minimal.
@@ -1989,20 +2007,18 @@ static int may_mknod(mode_t mode)
 asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 				unsigned dev)
 {
-	int error = 0;
-	char * tmp;
-	struct dentry * dentry;
+	int error;
+	char *tmp;
+	struct dentry *dentry;
 	struct nameidata nd;
 
 	if (S_ISDIR(mode))
 		return -EPERM;
-	tmp = getname(filename);
-	if (IS_ERR(tmp))
-		return PTR_ERR(tmp);
 
-	error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
+	error = user_path_parent(dfd, filename, &nd, &tmp);
 	if (error)
-		goto out;
+		return error;
+
 	dentry = lookup_create(&nd, 0);
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
@@ -2034,7 +2050,6 @@ out_dput:
 out_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	path_put(&nd.path);
-out:
 	putname(tmp);
 
 	return error;
@@ -2074,14 +2089,10 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
 	struct dentry *dentry;
 	struct nameidata nd;
 
-	tmp = getname(pathname);
-	error = PTR_ERR(tmp);
-	if (IS_ERR(tmp))
+	error = user_path_parent(dfd, pathname, &nd, &tmp);
+	if (error)
 		goto out_err;
 
-	error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
-	if (error)
-		goto out;
 	dentry = lookup_create(&nd, 1);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
@@ -2099,7 +2110,6 @@ out_dput:
 out_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	path_put(&nd.path);
-out:
 	putname(tmp);
 out_err:
 	return error;
@@ -2177,13 +2187,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
 	struct dentry *dentry;
 	struct nameidata nd;
 
-	name = getname(pathname);
-	if(IS_ERR(name))
-		return PTR_ERR(name);
-
-	error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
+	error = user_path_parent(dfd, pathname, &nd, &name);
 	if (error)
-		goto exit;
+		return error;
 
 	switch(nd.last_type) {
 		case LAST_DOTDOT:
@@ -2212,7 +2218,6 @@ exit2:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 exit1:
 	path_put(&nd.path);
-exit:
 	putname(name);
 	return error;
 }
@@ -2261,19 +2266,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
  */
 static long do_unlinkat(int dfd, const char __user *pathname)
 {
-	int error = 0;
-	char * name;
+	int error;
+	char *name;
 	struct dentry *dentry;
 	struct nameidata nd;
 	struct inode *inode = NULL;
 
-	name = getname(pathname);
-	if(IS_ERR(name))
-		return PTR_ERR(name);
-
-	error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
+	error = user_path_parent(dfd, pathname, &nd, &name);
 	if (error)
-		goto exit;
+		return error;
+
 	error = -EISDIR;
 	if (nd.last_type != LAST_NORM)
 		goto exit1;
@@ -2300,7 +2302,6 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 		iput(inode);	/* truncate the inode here */
 exit1:
 	path_put(&nd.path);
-exit:
 	putname(name);
 	return error;
 
@@ -2350,23 +2351,20 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 asmlinkage long sys_symlinkat(const char __user *oldname,
 			      int newdfd, const char __user *newname)
 {
-	int error = 0;
-	char * from;
-	char * to;
+	int error;
+	char *from;
+	char *to;
 	struct dentry *dentry;
 	struct nameidata nd;
 
 	from = getname(oldname);
-	if(IS_ERR(from))
+	if (IS_ERR(from))
 		return PTR_ERR(from);
-	to = getname(newname);
-	error = PTR_ERR(to);
-	if (IS_ERR(to))
-		goto out_putname;
 
-	error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
+	error = user_path_parent(newdfd, newname, &nd, &to);
 	if (error)
-		goto out;
+		goto out_putname;
+
 	dentry = lookup_create(&nd, 0);
 	error = PTR_ERR(dentry);
 	if (IS_ERR(dentry))
@@ -2382,7 +2380,6 @@ out_dput:
 out_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 	path_put(&nd.path);
-out:
 	putname(to);
 out_putname:
 	putname(from);
@@ -2449,21 +2446,18 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	struct nameidata nd;
 	struct path old_path;
 	int error;
-	char * to;
+	char *to;
 
 	if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
 		return -EINVAL;
 
-	to = getname(newname);
-	if (IS_ERR(to))
-		return PTR_ERR(to);
-
 	error = user_path_at(olddfd, oldname,
 			     flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
 			     &old_path);
 	if (error)
-		goto exit;
-	error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd);
+		return error;
+
+	error = user_path_parent(newdfd, newname, &nd, &to);
 	if (error)
 		goto out;
 	error = -EXDEV;
@@ -2484,10 +2478,9 @@ out_unlock:
 	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 out_release:
 	path_put(&nd.path);
+	putname(to);
 out:
 	path_put(&old_path);
-exit:
-	putname(to);
 
 	return error;
 }
@@ -2643,20 +2636,22 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return error;
 }
 
-static int do_rename(int olddfd, const char *oldname,
-			int newdfd, const char *newname)
+asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
+			     int newdfd, const char __user *newname)
 {
-	int error = 0;
-	struct dentry * old_dir, * new_dir;
-	struct dentry * old_dentry, *new_dentry;
-	struct dentry * trap;
+	struct dentry *old_dir, *new_dir;
+	struct dentry *old_dentry, *new_dentry;
+	struct dentry *trap;
 	struct nameidata oldnd, newnd;
+	char *from;
+	char *to;
+	int error;
 
-	error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd);
+	error = user_path_parent(olddfd, oldname, &oldnd, &from);
 	if (error)
 		goto exit;
 
-	error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd);
+	error = user_path_parent(newdfd, newname, &newnd, &to);
 	if (error)
 		goto exit1;
 
@@ -2718,29 +2713,11 @@ exit3:
 	unlock_rename(new_dir, old_dir);
 exit2:
 	path_put(&newnd.path);
+	putname(to);
 exit1:
 	path_put(&oldnd.path);
-exit:
-	return error;
-}
-
-asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
-			     int newdfd, const char __user *newname)
-{
-	int error;
-	char * from;
-	char * to;
-
-	from = getname(oldname);
-	if(IS_ERR(from))
-		return PTR_ERR(from);
-	to = getname(newname);
-	error = PTR_ERR(to);
-	if (!IS_ERR(to)) {
-		error = do_rename(olddfd, from, newdfd, to);
-		putname(to);
-	}
 	putname(from);
+exit:
 	return error;
 }
 
-- 
cgit v1.2.3


From a569c711f63995ad80c23918525111e0cdb0bc73 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 23 Jul 2008 14:42:05 -0400
Subject: [PATCH] don't pass nameidata to gfs2_lookupi()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/gfs2/inode.c      | 6 +++---
 fs/gfs2/inode.h      | 2 +-
 fs/gfs2/ops_export.c | 2 +-
 fs/gfs2/ops_inode.c  | 4 ++--
 fs/gfs2/super.c      | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6da0ab355b8..8b0806a3294 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -448,7 +448,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
 	struct qstr qstr;
 	struct inode *inode;
 	gfs2_str2qstr(&qstr, name);
-	inode = gfs2_lookupi(dip, &qstr, 1, NULL);
+	inode = gfs2_lookupi(dip, &qstr, 1);
 	/* gfs2_lookupi has inconsistent callers: vfs
 	 * related routines expect NULL for no entry found,
 	 * gfs2_lookup_simple callers expect ENOENT
@@ -477,7 +477,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
  */
 
 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
-			   int is_root, struct nameidata *nd)
+			   int is_root)
 {
 	struct super_block *sb = dir->i_sb;
 	struct gfs2_inode *dip = GFS2_I(dir);
@@ -1173,7 +1173,7 @@ int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
 			break;
 		}
 
-		tmp = gfs2_lookupi(dir, &dotdot, 1, NULL);
+		tmp = gfs2_lookupi(dir, &dotdot, 1);
 		if (IS_ERR(tmp)) {
 			error = PTR_ERR(tmp);
 			break;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6074c2506f7..58f9607d6a8 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -83,7 +83,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip);
 int gfs2_dinode_dealloc(struct gfs2_inode *inode);
 int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
-			   int is_root, struct nameidata *nd);
+			   int is_root);
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 			   unsigned int mode, dev_t dev);
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 990d9f4bc46..9cda8536530 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -134,7 +134,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
 	struct dentry *dentry;
 
 	gfs2_str2qstr(&dotdot, "..");
-	inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL);
+	inode = gfs2_lookupi(child->d_inode, &dotdot, 1);
 
 	if (!inode)
 		return ERR_PTR(-ENOENT);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 4e982532f08..e2c62f73a77 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -74,7 +74,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
 			return PTR_ERR(inode);
 		}
 
-		inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
+		inode = gfs2_lookupi(dir, &dentry->d_name, 0);
 		if (inode) {
 			if (!IS_ERR(inode)) {
 				gfs2_holder_uninit(ghs);
@@ -109,7 +109,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
 
 	dentry->d_op = &gfs2_dops;
 
-	inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
+	inode = gfs2_lookupi(dir, &dentry->d_name, 0);
 	if (inode && IS_ERR(inode))
 		return ERR_CAST(inode);
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 63a8a902d9d..ca831991cbc 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -389,7 +389,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 			break;
 
 		INIT_LIST_HEAD(&jd->extent_list);
-		jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
+		jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
 		if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
 			if (!jd->jd_inode)
 				error = -ENOENT;
-- 
cgit v1.2.3


From 58ec42b061bf5dad8fa0370a19966cfd96eaf80c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 23 Jul 2008 14:45:55 -0400
Subject: [PATCH] don't pass nameidata to __ncp_lookup_validate()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 011ef0b6d2d..07e9715b865 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -266,7 +266,7 @@ leave_me:;
 
 
 static int
-__ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
+__ncp_lookup_validate(struct dentry *dentry)
 {
 	struct ncp_server *server;
 	struct dentry *parent;
@@ -340,7 +340,7 @@ ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
 {
 	int res;
 	lock_kernel();
-	res = __ncp_lookup_validate(dentry, nd);
+	res = __ncp_lookup_validate(dentry);
 	unlock_kernel();
 	return res;
 }
-- 
cgit v1.2.3


From 3c333937ee3be114b181c4861188cfe8f6a59697 Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Fri, 25 Jul 2008 22:32:13 -0400
Subject: [PATCH] dup3 fix

Al Viro notice one cornercase that the new dup3() code.  The dup2()
function, as a special case, handles dup-ing to the same file
descriptor.  In this case the current dup3() code does nothing at
all.  I.e., it ingnores the flags parameter.  This shouldn't happen,
the close-on-exec flag should be set if requested.

In case the O_CLOEXEC bit in the flags parameter is not set the
dup3() function should behave in this respect identical to dup2().
This means dup3(fd, fd, 0) should not actively reset the c-o-e
flag.

The patch below implements this minor change.

[AV: credits to Artur Grabowski for bringing that up as potential subtle point
in dup2() behaviour]

Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 9679fcbdeaa..ce12a688511 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -139,8 +139,13 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 	if (!(file = fcheck(oldfd)))
 		goto out_unlock;
 	err = newfd;
-	if (newfd == oldfd)
+	if (unlikely(newfd == oldfd)) {
+		if (flags & O_CLOEXEC) {
+			fdt = files_fdtable(files);
+			FD_SET(newfd, fdt->close_on_exec);
+		}
 		goto out_unlock;
+	}
 	err = -EBADF;
 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 		goto out_unlock;
-- 
cgit v1.2.3


From 516e0cc5646f377ab80fcc2ee639892eccb99853 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 00:39:17 -0400
Subject: [PATCH] f_count may wrap around

make it atomic_long_t; while we are at it, get rid of useless checks in affs,
hfs and hpfs - ->open() always has it equal to 1, ->release() - to 0.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/file.c     |  4 ----
 fs/aio.c           |  6 +++---
 fs/file_table.c    | 10 +++++-----
 fs/hfs/inode.c     |  4 ----
 fs/hfsplus/inode.c |  4 ----
 5 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6eac7bdeec9..1377b1240b6 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = {
 static int
 affs_file_open(struct inode *inode, struct file *filp)
 {
-	if (atomic_read(&filp->f_count) != 1)
-		return 0;
 	pr_debug("AFFS: open(%lu,%d)\n",
 		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 	atomic_inc(&AFFS_I(inode)->i_opencnt);
@@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp)
 static int
 affs_file_release(struct inode *inode, struct file *filp)
 {
-	if (atomic_read(&filp->f_count) != 0)
-		return 0;
 	pr_debug("AFFS: release(%lu, %d)\n",
 		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
 
diff --git a/fs/aio.c b/fs/aio.c
index 0051fd94b44..f658441d566 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data)
  */
 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 {
-	dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n",
-		req, atomic_read(&req->ki_filp->f_count));
+	dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
+		req, atomic_long_read(&req->ki_filp->f_count));
 
 	assert_spin_locked(&ctx->ctx_lock);
 
@@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	/* Must be done under the lock to serialise against cancellation.
 	 * Call this aio_fput as it duplicates fput via the fput_work.
 	 */
-	if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
diff --git a/fs/file_table.c b/fs/file_table.c
index 83084225b4c..f45a4493f9e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -120,7 +120,7 @@ struct file *get_empty_filp(void)
 
 	tsk = current;
 	INIT_LIST_HEAD(&f->f_u.fu_list);
-	atomic_set(&f->f_count, 1);
+	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
 	f->f_uid = tsk->fsuid;
 	f->f_gid = tsk->fsgid;
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file);
 
 void fput(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count))
+	if (atomic_long_dec_and_test(&file->f_count))
 		__fput(file);
 }
 
@@ -294,7 +294,7 @@ struct file *fget(unsigned int fd)
 	rcu_read_lock();
 	file = fcheck_files(files, fd);
 	if (file) {
-		if (!atomic_inc_not_zero(&file->f_count)) {
+		if (!atomic_long_inc_not_zero(&file->f_count)) {
 			/* File object ref couldn't be taken */
 			rcu_read_unlock();
 			return NULL;
@@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 		rcu_read_lock();
 		file = fcheck_files(files, fd);
 		if (file) {
-			if (atomic_inc_not_zero(&file->f_count))
+			if (atomic_long_inc_not_zero(&file->f_count))
 				*fput_needed = 1;
 			else
 				/* Didn't get the reference, someone's freed */
@@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 
 void put_filp(struct file *file)
 {
-	if (atomic_dec_and_test(&file->f_count)) {
+	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
 		file_kill(file);
 		file_free(file);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index aa73f3fd5dd..7e19835efa2 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -522,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file)
 {
 	if (HFS_IS_RSRC(inode))
 		inode = HFS_I(inode)->rsrc_inode;
-	if (atomic_read(&file->f_count) != 1)
-		return 0;
 	atomic_inc(&HFS_I(inode)->opencnt);
 	return 0;
 }
@@ -534,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file)
 
 	if (HFS_IS_RSRC(inode))
 		inode = HFS_I(inode)->rsrc_inode;
-	if (atomic_read(&file->f_count) != 0)
-		return 0;
 	if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
 		mutex_lock(&inode->i_mutex);
 		hfs_file_truncate(inode);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d4014e3044d..b085d64a2b6 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
 {
 	if (HFSPLUS_IS_RSRC(inode))
 		inode = HFSPLUS_I(inode).rsrc_inode;
-	if (atomic_read(&file->f_count) != 1)
-		return 0;
 	atomic_inc(&HFSPLUS_I(inode).opencnt);
 	return 0;
 }
@@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 
 	if (HFSPLUS_IS_RSRC(inode))
 		inode = HFSPLUS_I(inode).rsrc_inode;
-	if (atomic_read(&file->f_count) != 0)
-		return 0;
 	if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
 		mutex_lock(&inode->i_mutex);
 		hfsplus_file_truncate(inode);
-- 
cgit v1.2.3


From 964bd183624c03680796b63b4ab97ee3905a806a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 03:33:14 -0400
Subject: [PATCH] get rid of __user_path_lookup_open

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c  | 14 ++++++++++----
 fs/namei.c | 13 -------------
 2 files changed, 10 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index eca58c29ede..9696bbf0f0b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -106,11 +106,17 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
  */
 asmlinkage long sys_uselib(const char __user * library)
 {
-	struct file * file;
+	struct file *file;
 	struct nameidata nd;
-	int error;
-
-	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC);
+	char *tmp = getname(library);
+	int error = PTR_ERR(tmp);
+
+	if (!IS_ERR(tmp)) {
+		error = path_lookup_open(AT_FDCWD, tmp,
+					 LOOKUP_FOLLOW, &nd,
+					 FMODE_READ|FMODE_EXEC);
+		putname(tmp);
+	}
 	if (error)
 		goto out;
 
diff --git a/fs/namei.c b/fs/namei.c
index 38ceb6e06eb..a7b0a0b8012 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1193,19 +1193,6 @@ static int path_lookup_create(int dfd, const char *name,
 			nd, open_flags, create_mode);
 }
 
-int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
-		struct nameidata *nd, int open_flags)
-{
-	char *tmp = getname(name);
-	int err = PTR_ERR(tmp);
-
-	if (!IS_ERR(tmp)) {
-		err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
-		putname(tmp);
-	}
-	return err;
-}
-
 static struct dentry *__lookup_hash(struct qstr *name,
 		struct dentry *base, struct nameidata *nd)
 {
-- 
cgit v1.2.3


From 3f8206d496e9e9495afb1d4e70d29712b4d403c9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 03:46:43 -0400
Subject: [PATCH] get rid of indirect users of namei.h

fs.h needs path.h, not namei.h; nfs_fs.h doesn't need it at all.
Several places in the tree needed direct include.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfsctl.c | 1 +
 fs/ubifs/file.c  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1955a2702e6..c53e65f8f3a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/namei.h>
 #include <linux/fcntl.h>
 #include <linux/net.h>
 #include <linux/in.h>
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 005a3b854d9..8565e586e53 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -53,6 +53,7 @@
 
 #include "ubifs.h"
 #include <linux/mount.h>
+#include <linux/namei.h>
 
 static int read_block(struct inode *inode, void *addr, unsigned int block,
 		      struct ubifs_data_node *dn)
-- 
cgit v1.2.3


From 6c5d0512a091480c9f981162227fdb1c9d70e555 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 13:38:19 -0400
Subject: [PATCH] get rid of corner case in dup3() entirely

Since Ulrich is OK with getting rid of dup3(fd, fd, flags) completely,
to hell the damn thing goes.  Corner case for dup2() is handled in
sys_dup2() (complete with -EBADF if dup2(fd, fd) is called with fd
that is not open), the rest is done in dup3().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index ce12a688511..3deec988708 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -135,18 +135,12 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 	if ((flags & ~O_CLOEXEC) != 0)
 		return -EINVAL;
 
+	if (unlikely(oldfd == newfd))
+		return -EINVAL;
+
 	spin_lock(&files->file_lock);
 	if (!(file = fcheck(oldfd)))
 		goto out_unlock;
-	err = newfd;
-	if (unlikely(newfd == oldfd)) {
-		if (flags & O_CLOEXEC) {
-			fdt = files_fdtable(files);
-			FD_SET(newfd, fdt->close_on_exec);
-		}
-		goto out_unlock;
-	}
-	err = -EBADF;
 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
 		goto out_unlock;
 	get_file(file);			/* We are now finished with oldfd */
@@ -194,6 +188,14 @@ out_fput:
 
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 {
+	if (unlikely(newfd == oldfd)) { /* corner case */
+		struct files_struct *files = current->files;
+		rcu_read_lock();
+		if (!fcheck_files(files, oldfd))
+			oldfd = -EBADF;
+		rcu_read_unlock();
+		return oldfd;
+	}
 	return sys_dup3(oldfd, newfd, 0);
 }
 
-- 
cgit v1.2.3


From 4e1e018ecc6f7bfd10fc75b3ff9715cc8164e0a2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 Jul 2008 16:01:20 -0400
Subject: [PATCH] fix RLIM_NOFILE handling

* dup2() should return -EBADF on exceeded sysctl_nr_open
* dup() should *not* return -EINVAL even if you have rlimit set to 0;
  it should get -EMFILE instead.

Check for orig_start exceeding rlimit taken to sys_fcntl().
Failing expand_files() in dup{2,3}() now gets -EMFILE remapped to -EBADF.
Consequently, remaining checks for rlimit are taken to expand_files().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c | 18 ++++++------------
 fs/file.c  |  9 +++++++++
 fs/open.c  |  9 ---------
 3 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3deec988708..61d62513681 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -64,11 +64,6 @@ static int locate_fd(unsigned int orig_start, int cloexec)
 	struct fdtable *fdt;
 
 	spin_lock(&files->file_lock);
-
-	error = -EINVAL;
-	if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-		goto out;
-
 repeat:
 	fdt = files_fdtable(files);
 	/*
@@ -83,10 +78,6 @@ repeat:
 	if (start < fdt->max_fds)
 		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
 					   fdt->max_fds, start);
-	
-	error = -EMFILE;
-	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-		goto out;
 
 	error = expand_files(files, newfd);
 	if (error < 0)
@@ -141,13 +132,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 	spin_lock(&files->file_lock);
 	if (!(file = fcheck(oldfd)))
 		goto out_unlock;
-	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-		goto out_unlock;
 	get_file(file);			/* We are now finished with oldfd */
 
 	err = expand_files(files, newfd);
-	if (err < 0)
+	if (unlikely(err < 0)) {
+		if (err == -EMFILE)
+			err = -EBADF;
 		goto out_fput;
+	}
 
 	/* To avoid races with open() and dup(), we will mark the fd as
 	 * in-use in the open-file bitmap throughout the entire dup2()
@@ -328,6 +320,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	switch (cmd) {
 	case F_DUPFD:
 	case F_DUPFD_CLOEXEC:
+		if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+			break;
 		get_file(filp);
 		err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
 		break;
diff --git a/fs/file.c b/fs/file.c
index 7b3887e054d..d8773b19fe4 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -250,9 +250,18 @@ int expand_files(struct files_struct *files, int nr)
 	struct fdtable *fdt;
 
 	fdt = files_fdtable(files);
+
+	/*
+	 * N.B. For clone tasks sharing a files structure, this test
+	 * will limit the total number of files that can be opened.
+	 */
+	if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
+		return -EMFILE;
+
 	/* Do we need to expand? */
 	if (nr < fdt->max_fds)
 		return 0;
+
 	/* Can we expand? */
 	if (nr >= sysctl_nr_open)
 		return -EMFILE;
diff --git a/fs/open.c b/fs/open.c
index 3fe1a6857c7..52647be277a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -972,7 +972,6 @@ int get_unused_fd_flags(int flags)
 	int fd, error;
 	struct fdtable *fdt;
 
-  	error = -EMFILE;
 	spin_lock(&files->file_lock);
 
 repeat:
@@ -980,13 +979,6 @@ repeat:
 	fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
 				files->next_fd);
 
-	/*
-	 * N.B. For clone tasks sharing a files structure, this test
-	 * will limit the total number of files that can be opened.
-	 */
-	if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-		goto out;
-
 	/* Do we need to expand the fd array or fd set?  */
 	error = expand_files(files, fd);
 	if (error < 0)
@@ -997,7 +989,6 @@ repeat:
 	 	 * If we needed to expand the fs array we
 		 * might have blocked - try again.
 		 */
-		error = -EMFILE;
 		goto repeat;
 	}
 
-- 
cgit v1.2.3