aboutsummaryrefslogtreecommitdiff
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c275
1 files changed, 209 insertions, 66 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 8cf9bb9c2fc..e179f71bfcb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
return -EACCES;
flag &= ~O_TRUNC;
- } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
- return -EROFS;
+ }
error = vfs_permission(nd, acc_mode);
if (error)
@@ -1677,7 +1676,12 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
return 0;
}
-static int open_namei_create(struct nameidata *nd, struct path *path,
+/*
+ * Be careful about ever adding any more callers of this
+ * function. Its flags must be in the namei format, not
+ * what get passed to sys_open().
+ */
+static int __open_namei_create(struct nameidata *nd, struct path *path,
int flag, int mode)
{
int error;
@@ -1696,26 +1700,56 @@ static int open_namei_create(struct nameidata *nd, struct path *path,
}
/*
- * open_namei()
+ * Note that while the flag value (low two bits) for sys_open means:
+ * 00 - read-only
+ * 01 - write-only
+ * 10 - read-write
+ * 11 - special
+ * it is changed into
+ * 00 - no permissions needed
+ * 01 - read-permission
+ * 10 - write-permission
+ * 11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
*
- * namei for open - this is in fact almost the whole open-routine.
- *
- * Note that the low bits of "flag" aren't the same as in the open
- * system call - they are 00 - no permissions needed
- * 01 - read permission needed
- * 10 - write permission needed
- * 11 - read/write permissions needed
- * which is a lot more logical, and also allows the "no perm" needed
- * for symlinks (where the permissions are checked later).
- * SMP-safe
+*/
+static inline int open_to_namei_flags(int flag)
+{
+ if ((flag+1) & O_ACCMODE)
+ flag++;
+ return flag;
+}
+
+static int open_will_write_to_fs(int flag, struct inode *inode)
+{
+ /*
+ * We'll never write to the fs underlying
+ * a device file.
+ */
+ if (special_file(inode->i_mode))
+ return 0;
+ return (flag & O_TRUNC);
+}
+
+/*
+ * Note that the low bits of the passed in "open_flag"
+ * are not the same as in the local variable "flag". See
+ * open_to_namei_flags() for more details.
*/
-int open_namei(int dfd, const char *pathname, int flag,
- int mode, struct nameidata *nd)
+struct file *do_filp_open(int dfd, const char *pathname,
+ int open_flag, int mode)
{
+ struct file *filp;
+ struct nameidata nd;
int acc_mode, error;
struct path path;
struct dentry *dir;
int count = 0;
+ int will_write;
+ int flag = open_to_namei_flags(open_flag);
acc_mode = ACC_MODE(flag);
@@ -1733,18 +1767,19 @@ int open_namei(int dfd, const char *pathname, int flag,
*/
if (!(flag & O_CREAT)) {
error = path_lookup_open(dfd, pathname, lookup_flags(flag),
- nd, flag);
+ &nd, flag);
if (error)
- return error;
+ return ERR_PTR(error);
goto ok;
}
/*
* Create - we need to know the parent.
*/
- error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
+ error = path_lookup_create(dfd, pathname, LOOKUP_PARENT,
+ &nd, flag, mode);
if (error)
- return error;
+ return ERR_PTR(error);
/*
* We have the parent and last component. First of all, check
@@ -1752,14 +1787,14 @@ int open_namei(int dfd, const char *pathname, int flag,
* will not do.
*/
error = -EISDIR;
- if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
+ if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
goto exit;
- dir = nd->path.dentry;
- nd->flags &= ~LOOKUP_PARENT;
+ dir = nd.path.dentry;
+ nd.flags &= ~LOOKUP_PARENT;
mutex_lock(&dir->d_inode->i_mutex);
- path.dentry = lookup_hash(nd);
- path.mnt = nd->path.mnt;
+ path.dentry = lookup_hash(&nd);
+ path.mnt = nd.path.mnt;
do_last:
error = PTR_ERR(path.dentry);
@@ -1768,18 +1803,31 @@ do_last:
goto exit;
}
- if (IS_ERR(nd->intent.open.file)) {
- mutex_unlock(&dir->d_inode->i_mutex);
- error = PTR_ERR(nd->intent.open.file);
- goto exit_dput;
+ if (IS_ERR(nd.intent.open.file)) {
+ error = PTR_ERR(nd.intent.open.file);
+ goto exit_mutex_unlock;
}
/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
- error = open_namei_create(nd, &path, flag, mode);
+ /*
+ * This write is needed to ensure that a
+ * ro->rw transition does not occur between
+ * the time when the file is created and when
+ * a permanent write count is taken through
+ * the 'struct file' in nameidata_to_filp().
+ */
+ error = mnt_want_write(nd.path.mnt);
if (error)
+ goto exit_mutex_unlock;
+ error = __open_namei_create(&nd, &path, flag, mode);
+ if (error) {
+ mnt_drop_write(nd.path.mnt);
goto exit;
- return 0;
+ }
+ filp = nameidata_to_filp(&nd, open_flag);
+ mnt_drop_write(nd.path.mnt);
+ return filp;
}
/*
@@ -1804,23 +1852,52 @@ do_last:
if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
goto do_link;
- path_to_nameidata(&path, nd);
+ path_to_nameidata(&path, &nd);
error = -EISDIR;
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
- error = may_open(nd, acc_mode, flag);
- if (error)
+ /*
+ * Consider:
+ * 1. may_open() truncates a file
+ * 2. a rw->ro mount transition occurs
+ * 3. nameidata_to_filp() fails due to
+ * the ro mount.
+ * That would be inconsistent, and should
+ * be avoided. Taking this mnt write here
+ * ensures that (2) can not occur.
+ */
+ will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
+ if (will_write) {
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit;
+ }
+ error = may_open(&nd, acc_mode, flag);
+ if (error) {
+ if (will_write)
+ mnt_drop_write(nd.path.mnt);
goto exit;
- return 0;
+ }
+ filp = nameidata_to_filp(&nd, open_flag);
+ /*
+ * It is now safe to drop the mnt write
+ * because the filp has had a write taken
+ * on its behalf.
+ */
+ if (will_write)
+ mnt_drop_write(nd.path.mnt);
+ return filp;
+exit_mutex_unlock:
+ mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
- path_put_conditional(&path, nd);
+ path_put_conditional(&path, &nd);
exit:
- if (!IS_ERR(nd->intent.open.file))
- release_open_intent(nd);
- path_put(&nd->path);
- return error;
+ if (!IS_ERR(nd.intent.open.file))
+ release_open_intent(&nd);
+ path_put(&nd.path);
+ return ERR_PTR(error);
do_link:
error = -ELOOP;
@@ -1836,43 +1913,60 @@ do_link:
* stored in nd->last.name and we will have to putname() it when we
* are done. Procfs-like symlinks just set LAST_BIND.
*/
- nd->flags |= LOOKUP_PARENT;
- error = security_inode_follow_link(path.dentry, nd);
+ nd.flags |= LOOKUP_PARENT;
+ error = security_inode_follow_link(path.dentry, &nd);
if (error)
goto exit_dput;
- error = __do_follow_link(&path, nd);
+ error = __do_follow_link(&path, &nd);
if (error) {
/* Does someone understand code flow here? Or it is only
* me so stupid? Anathema to whoever designed this non-sense
* with "intent.open".
*/
- release_open_intent(nd);
- return error;
+ release_open_intent(&nd);
+ return ERR_PTR(error);
}
- nd->flags &= ~LOOKUP_PARENT;
- if (nd->last_type == LAST_BIND)
+ nd.flags &= ~LOOKUP_PARENT;
+ if (nd.last_type == LAST_BIND)
goto ok;
error = -EISDIR;
- if (nd->last_type != LAST_NORM)
+ if (nd.last_type != LAST_NORM)
goto exit;
- if (nd->last.name[nd->last.len]) {
- __putname(nd->last.name);
+ if (nd.last.name[nd.last.len]) {
+ __putname(nd.last.name);
goto exit;
}
error = -ELOOP;
if (count++==32) {
- __putname(nd->last.name);
+ __putname(nd.last.name);
goto exit;
}
- dir = nd->path.dentry;
+ dir = nd.path.dentry;
mutex_lock(&dir->d_inode->i_mutex);
- path.dentry = lookup_hash(nd);
- path.mnt = nd->path.mnt;
- __putname(nd->last.name);
+ path.dentry = lookup_hash(&nd);
+ path.mnt = nd.path.mnt;
+ __putname(nd.last.name);
goto do_last;
}
/**
+ * filp_open - open file and return file pointer
+ *
+ * @filename: path to open
+ * @flags: open flags as per the open(2) second argument
+ * @mode: mode for the new file if O_CREAT is set, else ignored
+ *
+ * This is the helper to open a file from kernelspace if you really
+ * have to. But in generally you should not do this, so please move
+ * along, nothing to see here..
+ */
+struct file *filp_open(const char *filename, int flags, int mode)
+{
+ return do_filp_open(AT_FDCWD, filename, flags, mode);
+}
+EXPORT_SYMBOL(filp_open);
+
+/**
* lookup_create - lookup a dentry, creating it if it doesn't exist
* @nd: nameidata info
* @is_dir: directory flag
@@ -1945,6 +2039,23 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
return error;
}
+static int may_mknod(mode_t mode)
+{
+ switch (mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ case 0: /* zero mode translates to S_IFREG */
+ return 0;
+ case S_IFDIR:
+ return -EPERM;
+ default:
+ return -EINVAL;
+ }
+}
+
asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
unsigned dev)
{
@@ -1963,12 +2074,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
if (error)
goto out;
dentry = lookup_create(&nd, 0);
- error = PTR_ERR(dentry);
-
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto out_unlock;
+ }
if (!IS_POSIXACL(nd.path.dentry->d_inode))
mode &= ~current->fs->umask;
- if (!IS_ERR(dentry)) {
- switch (mode & S_IFMT) {
+ error = may_mknod(mode);
+ if (error)
+ goto out_dput;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
+ switch (mode & S_IFMT) {
case 0: case S_IFREG:
error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
break;
@@ -1979,14 +2097,11 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
case S_IFIFO: case S_IFSOCK:
error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
break;
- case S_IFDIR:
- error = -EPERM;
- break;
- default:
- error = -EINVAL;
- }
- dput(dentry);
}
+ mnt_drop_write(nd.path.mnt);
+out_dput:
+ dput(dentry);
+out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
path_put(&nd.path);
out:
@@ -2044,7 +2159,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
if (!IS_POSIXACL(nd.path.dentry->d_inode))
mode &= ~current->fs->umask;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+ mnt_drop_write(nd.path.mnt);
+out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2151,7 +2271,12 @@ static long do_rmdir(int dfd, const char __user *pathname)
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit2;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit3;
error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+ mnt_drop_write(nd.path.mnt);
+exit3:
dput(dentry);
exit2:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2232,7 +2357,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
inode = dentry->d_inode;
if (inode)
atomic_inc(&inode->i_count);
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit2;
error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+ mnt_drop_write(nd.path.mnt);
exit2:
dput(dentry);
}
@@ -2313,7 +2442,12 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
if (IS_ERR(dentry))
goto out_unlock;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
+ mnt_drop_write(nd.path.mnt);
+out_dput:
dput(dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2408,7 +2542,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
goto out_unlock;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto out_dput;
error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
+ mnt_drop_write(nd.path.mnt);
+out_dput:
dput(new_dentry);
out_unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2634,8 +2773,12 @@ static int do_rename(int olddfd, const char *oldname,
if (new_dentry == trap)
goto exit5;
+ error = mnt_want_write(oldnd.path.mnt);
+ if (error)
+ goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
+ mnt_drop_write(oldnd.path.mnt);
exit5:
dput(new_dentry);
exit4: