From f52111b1546943545e67573c4dde1c7613ca33d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 18:19:16 -0400 Subject: [PATCH] take init_files to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 4c6f0ea12c4..754cd05b06a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -275,3 +275,16 @@ void __init files_defer_init(void) for_each_possible_cpu(i) fdtable_defer_list_init(i); } + +struct files_struct init_files = { + .count = ATOMIC_INIT(1), + .fdt = &init_files.fdtab, + .fdtab = { + .max_fds = NR_OPEN_DEFAULT, + .fd = &init_files.fd_array[0], + .close_on_exec = (fd_set *)&init_files.close_on_exec_init, + .open_fds = (fd_set *)&init_files.open_fds_init, + .rcu = RCU_HEAD_INIT, + }, + .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), +}; -- cgit v1.2.3 From 02afc6267f6d55d47aba9fcafdbd1b7230d2294a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 19:42:56 -0400 Subject: [PATCH] dup_fd() fixes, part 1 Move the sucker to fs/file.c in preparation to the rest Signed-off-by: Al Viro --- fs/file.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 754cd05b06a..7dbadaaf00f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -261,6 +261,136 @@ int expand_files(struct files_struct *files, int nr) return expand_fdtable(files, nr); } +static int count_open_files(struct fdtable *fdt) +{ + int size = fdt->max_fds; + int i; + + /* Find the last open fd */ + for (i = size/(8*sizeof(long)); i > 0; ) { + if (fdt->open_fds->fds_bits[--i]) + break; + } + i = (i+1) * 8 * sizeof(long); + return i; +} + +static struct files_struct *alloc_files(void) +{ + struct files_struct *newf; + struct fdtable *fdt; + + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); + if (!newf) + goto out; + + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + fdt = &newf->fdtab; + fdt->max_fds = NR_OPEN_DEFAULT; + fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + fdt->open_fds = (fd_set *)&newf->open_fds_init; + fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&fdt->rcu); + fdt->next = NULL; + rcu_assign_pointer(newf->fdt, fdt); +out: + return newf; +} + +/* + * Allocate a new files structure and copy contents from the + * passed in files structure. + * errorp will be valid only when the returned files_struct is NULL. + */ +struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) +{ + struct files_struct *newf; + struct file **old_fds, **new_fds; + int open_files, size, i; + struct fdtable *old_fdt, *new_fdt; + + *errorp = -ENOMEM; + newf = alloc_files(); + if (!newf) + goto out; + + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + new_fdt = files_fdtable(newf); + open_files = count_open_files(old_fdt); + + /* + * Check whether we need to allocate a larger fd array and fd set. + * Note: we're not a clone task, so the open count won't change. + */ + if (open_files > new_fdt->max_fds) { + new_fdt->max_fds = 0; + spin_unlock(&oldf->file_lock); + spin_lock(&newf->file_lock); + *errorp = expand_files(newf, open_files-1); + spin_unlock(&newf->file_lock); + if (*errorp < 0) + goto out_release; + new_fdt = files_fdtable(newf); + /* + * Reacquire the oldf lock and a pointer to its fd table + * who knows it may have a new bigger fd table. We need + * the latest pointer. + */ + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + } + + old_fds = old_fdt->fd; + new_fds = new_fdt->fd; + + memcpy(new_fdt->open_fds->fds_bits, + old_fdt->open_fds->fds_bits, open_files/8); + memcpy(new_fdt->close_on_exec->fds_bits, + old_fdt->close_on_exec->fds_bits, open_files/8); + + for (i = open_files; i != 0; i--) { + struct file *f = *old_fds++; + if (f) { + get_file(f); + } else { + /* + * The fd may be claimed in the fd bitmap but not yet + * instantiated in the files array if a sibling thread + * is partway through open(). So make sure that this + * fd is available to the new process. + */ + FD_CLR(open_files - i, new_fdt->open_fds); + } + rcu_assign_pointer(*new_fds++, f); + } + spin_unlock(&oldf->file_lock); + + /* compute the remainder to be cleared */ + size = (new_fdt->max_fds - open_files) * sizeof(struct file *); + + /* This is long word aligned thus could use a optimized version */ + memset(new_fds, 0, size); + + if (new_fdt->max_fds > open_files) { + int left = (new_fdt->max_fds-open_files)/8; + int start = open_files / (8 * sizeof(unsigned long)); + + memset(&new_fdt->open_fds->fds_bits[start], 0, left); + memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); + } + + return newf; + +out_release: + kmem_cache_free(files_cachep, newf); +out: + return NULL; +} + static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); -- cgit v1.2.3 From 9dec3c4d306b09b31331e475e895bb9674e16d81 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:02:45 -0400 Subject: [PATCH] dup_fd() part 2 use alloc_fdtable() instead of expand_files(), get rid of pointless grabbing newf->file_lock, kill magic in copy_fdtable() that used to be there only to skip copying when called from dup_fd(). Signed-off-by: Al Viro --- fs/file.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 7dbadaaf00f..6491b2b5bc3 100644 --- a/fs/file.c +++ b/fs/file.c @@ -119,8 +119,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) unsigned int cpy, set; BUG_ON(nfdt->max_fds < ofdt->max_fds); - if (ofdt->max_fds == 0) - return; cpy = ofdt->max_fds * sizeof(struct file *); set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); @@ -327,14 +325,24 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) * Note: we're not a clone task, so the open count won't change. */ if (open_files > new_fdt->max_fds) { - new_fdt->max_fds = 0; spin_unlock(&oldf->file_lock); - spin_lock(&newf->file_lock); - *errorp = expand_files(newf, open_files-1); - spin_unlock(&newf->file_lock); - if (*errorp < 0) + + new_fdt = alloc_fdtable(open_files - 1); + if (!new_fdt) { + *errorp = -ENOMEM; + goto out_release; + } + + /* beyond sysctl_nr_open; nothing to do */ + if (unlikely(new_fdt->max_fds < open_files)) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + *errorp = -EMFILE; goto out_release; - new_fdt = files_fdtable(newf); + } + rcu_assign_pointer(files->fdt, new_fdt); + /* * Reacquire the oldf lock and a pointer to its fd table * who knows it may have a new bigger fd table. We need -- cgit v1.2.3 From afbec7fff4928c273a1f1bb14dfdfdf62688a193 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:11:17 -0400 Subject: [PATCH] dup_fd() - part 3 merge alloc_files() into dup_fd(), leave setting newf->fdt until the end Signed-off-by: Al Viro --- fs/file.c | 43 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 6491b2b5bc3..689d2b6947e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -273,31 +273,6 @@ static int count_open_files(struct fdtable *fdt) return i; } -static struct files_struct *alloc_files(void) -{ - struct files_struct *newf; - struct fdtable *fdt; - - newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); - if (!newf) - goto out; - - atomic_set(&newf->count, 1); - - spin_lock_init(&newf->file_lock); - newf->next_fd = 0; - fdt = &newf->fdtab; - fdt->max_fds = NR_OPEN_DEFAULT; - fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; - fdt->open_fds = (fd_set *)&newf->open_fds_init; - fdt->fd = &newf->fd_array[0]; - INIT_RCU_HEAD(&fdt->rcu); - fdt->next = NULL; - rcu_assign_pointer(newf->fdt, fdt); -out: - return newf; -} - /* * Allocate a new files structure and copy contents from the * passed in files structure. @@ -311,13 +286,24 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; - newf = alloc_files(); + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) goto out; + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + new_fdt = &newf->fdtab; + new_fdt->max_fds = NR_OPEN_DEFAULT; + new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + new_fdt->open_fds = (fd_set *)&newf->open_fds_init; + new_fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&new_fdt->rcu); + new_fdt->next = NULL; + spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); - new_fdt = files_fdtable(newf); open_files = count_open_files(old_fdt); /* @@ -341,7 +327,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) *errorp = -EMFILE; goto out_release; } - rcu_assign_pointer(files->fdt, new_fdt); /* * Reacquire the oldf lock and a pointer to its fd table @@ -391,6 +376,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); } + rcu_assign_pointer(newf->fdt, new_fdt); + return newf; out_release: -- cgit v1.2.3 From adbecb128cd2cc5d14b0ebef6d020ced0efd0ec6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:19:42 -0400 Subject: [PATCH] dup_fd() part 4 - race fix Parent _can_ be a clone task, contrary to the comment. Moreover, more files could be opened while we allocate a copy, in which case we end up copying only part into new descriptor table. Since what we get _is_ affected by all changes in the old range, we can get rather weird effects - e.g. dup2(0, 1024); close(0); in parallel with fork() resulting in child that sees the effect of close(), but not that of dup2() done just before that close(). What we need is to recalculate the open_count after having reacquired ->file_lock and if external fdtable we'd just allocated is too small for it, free the sucker and redo allocation. Signed-off-by: Al Viro --- fs/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 689d2b6947e..0f705c7cfef 100644 --- a/fs/file.c +++ b/fs/file.c @@ -308,11 +308,16 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) /* * Check whether we need to allocate a larger fd array and fd set. - * Note: we're not a clone task, so the open count won't change. */ - if (open_files > new_fdt->max_fds) { + while (unlikely(open_files > new_fdt->max_fds)) { spin_unlock(&oldf->file_lock); + if (new_fdt != &newf->fdtab) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + } + new_fdt = alloc_fdtable(open_files - 1); if (!new_fdt) { *errorp = -ENOMEM; @@ -335,6 +340,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) */ spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); + open_files = count_open_files(old_fdt); } old_fds = old_fdt->fd; -- cgit v1.2.3 From eceea0b3df05ed262ae32e0c6340cc7a3626632d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 May 2008 10:08:32 -0400 Subject: [PATCH] avoid multiplication overflows and signedness issues for max_fds Limit sysctl_nr_open - we don't want ->max_fds to exceed MAX_INT and we don't want size calculation for ->fd[] to overflow. Signed-off-by: Al Viro --- fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 0f705c7cfef..7b3887e054d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,6 +26,8 @@ struct fdtable_defer { }; int sysctl_nr_open __read_mostly = 1024*1024; +int sysctl_nr_open_min = BITS_PER_LONG; +int sysctl_nr_open_max = 1024 * 1024; /* raised later */ /* * We use this list to defer free fdtables that have vmalloced @@ -405,6 +407,8 @@ void __init files_defer_init(void) int i; for_each_possible_cpu(i) fdtable_defer_list_init(i); + sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; } struct files_struct init_files = { -- cgit v1.2.3 From 5f719558edf9c84bfbb1f7ad37e84c483282d09f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 May 2008 12:45:35 +0800 Subject: [Patch] fs/binfmt_elf.c: fix a wrong free In kmalloc failing path, we shouldn't free pointers in 'info', because the struct 'info' is uninitilized when kmalloc is called. And when kmalloc returns NULL, it's needless to kfree it. Signed-off-by: WANG Cong Cc: Alexander Viro Reviewed-by: Pekka Enberg -- Signed-off-by: Al Viro --- fs/binfmt_elf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b25707fee2c..bd08332079c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1900,7 +1900,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un /* alloc memory for large data structures: too large to be on stack */ elf = kmalloc(sizeof(*elf), GFP_KERNEL); if (!elf) - goto cleanup; + goto out; segs = current->mm->map_count; #ifdef ELF_CORE_EXTRA_PHDRS @@ -2034,8 +2034,9 @@ end_coredump: set_fs(fs); cleanup: - kfree(elf); free_note_info(&info); + kfree(elf); +out: return has_dumped; } -- cgit v1.2.3 From 08a6fac1c63233c87eec129938022f1a9a4d51f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 May 2008 16:38:25 -0400 Subject: [PATCH] get rid of leak in compat_execve() Even though copy_compat_strings() doesn't cache the pages, copy_strings_kernel() and stuff indirectly called by e.g. ->load_binary() is doing that, so we need to drop the cache contents in the end. [found by WANG Cong ] Signed-off-by: Al Viro --- fs/compat.c | 4 ++-- fs/exec.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index 332a869d2c5..ed43e17a5dc 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1405,7 +1405,7 @@ int compat_do_execve(char * filename, /* execve success */ security_bprm_free(bprm); acct_update_integrals(current); - kfree(bprm); + free_bprm(bprm); return retval; } @@ -1424,7 +1424,7 @@ out_file: } out_kfree: - kfree(bprm); + free_bprm(bprm); out_ret: return retval; diff --git a/fs/exec.c b/fs/exec.c index 1f8a24aa1f8..3c2ba7ce11d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1251,6 +1251,12 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) EXPORT_SYMBOL(search_binary_handler); +void free_bprm(struct linux_binprm *bprm) +{ + free_arg_pages(bprm); + kfree(bprm); +} + /* * sys_execve() executes a new program. */ @@ -1320,17 +1326,15 @@ int do_execve(char * filename, retval = search_binary_handler(bprm,regs); if (retval >= 0) { /* execve success */ - free_arg_pages(bprm); security_bprm_free(bprm); acct_update_integrals(current); - kfree(bprm); + free_bprm(bprm); if (displaced) put_files_struct(displaced); return retval; } out: - free_arg_pages(bprm); if (bprm->security) security_bprm_free(bprm); @@ -1344,7 +1348,7 @@ out_file: fput(bprm->file); } out_kfree: - kfree(bprm); + free_bprm(bprm); out_files: if (displaced) -- cgit v1.2.3 From 23c4971e3d97de4e1b7961ca6eacee35aa15ce5f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 8 May 2008 21:52:33 +0800 Subject: [Patch] fs/binfmt_elf.c: fix wrong return values create_elf_tables() returns 0 on success. But when strnlen_user() "fails", it returns 0 directly. So this is wrong. Signed-off-by: WANG Cong Cc: Alexander Viro Signed-off-by: Al Viro --- fs/binfmt_elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bd08332079c..0fa95b198e6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -256,7 +256,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) - return 0; + return -EINVAL; p += len; } if (__put_user(0, argv)) @@ -268,7 +268,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) - return 0; + return -EINVAL; p += len; } if (__put_user(0, envp)) -- cgit v1.2.3 From e9baf6e59842285bcf9570f5094e4c27674a0f7c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 May 2008 04:49:12 -0400 Subject: [PATCH] return to old errno choice in mkdir() et.al. In case when both EEXIST and EROFS would apply we used to return the former in mkdir(2) and friends. Lest anyone suspects us of being consistent, in the same situation knfsd gave clients nfs_erofs... ro-bind series had switched the syscall side of things to returning -EROFS and immediately broke an application - namely, mkdir -p. Patch restores the original behaviour... Signed-off-by: Al Viro --- fs/namei.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 32fd9655485..c7e43536c49 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2003,18 +2003,22 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) if (IS_ERR(dentry)) goto fail; + if (dentry->d_inode) + goto eexist; /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - * all is fine. Let's be bastards - you had / on the end, you've * been asking for (non-existent) directory. -ENOENT for you. */ - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) - goto enoent; + if (unlikely(!is_dir && nd->last.name[nd->last.len])) { + dput(dentry); + dentry = ERR_PTR(-ENOENT); + } return dentry; -enoent: +eexist: dput(dentry); - dentry = ERR_PTR(-ENOENT); + dentry = ERR_PTR(-EEXIST); fail: return dentry; } -- cgit v1.2.3