From cb2c0233755429037462e16ea0d5497a0092738c Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 7 Jul 2005 17:56:03 -0700 Subject: [PATCH] export generic_drop_inode() to modules OCFS2 wants to mark an inode which has been orphaned by another node so that during final iput it takes the correct path through the VFS and can pass through the OCFS2 delete_inode callback. Since i_nlink can get out of date with other nodes, the best way I see to accomplish this is by clearing i_nlink on those inodes at drop_inode time. Other than this small amount of work, nothing different needs to happen, so I think it would be cleanest to be able to just call generic_drop_inode at the end of the OCFS2 drop_inode callback. Signed-off-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 1f9a3a2b89b..6d695037a0a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1052,7 +1052,7 @@ static void generic_forget_inode(struct inode *inode) * inode when the usage count drops to zero, and * i_nlink is zero. */ -static void generic_drop_inode(struct inode *inode) +void generic_drop_inode(struct inode *inode) { if (!inode->i_nlink) generic_delete_inode(inode); @@ -1060,6 +1060,8 @@ static void generic_drop_inode(struct inode *inode) generic_forget_inode(inode); } +EXPORT_SYMBOL_GPL(generic_drop_inode); + /* * Called when we're dropping the last reference * to an inode. -- cgit v1.2.3 From cf36680887d6d942d2119c1ff1dfb2428b0f21f4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:13 -0700 Subject: [PATCH] move ioprio syscalls into syscalls.h - Make ioprio syscalls return long, like set/getpriority syscalls. - Move function prototypes into syscalls.h so we can pick them up in the 32/64bit compat code. Signed-off-by: Anton Blanchard Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ioprio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ioprio.c b/fs/ioprio.c index 663e420636d..97e1f088ba0 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -43,7 +43,7 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) return 0; } -asmlinkage int sys_ioprio_set(int which, int who, int ioprio) +asmlinkage long sys_ioprio_set(int which, int who, int ioprio) { int class = IOPRIO_PRIO_CLASS(ioprio); int data = IOPRIO_PRIO_DATA(ioprio); @@ -115,7 +115,7 @@ asmlinkage int sys_ioprio_set(int which, int who, int ioprio) return ret; } -asmlinkage int sys_ioprio_get(int which, int who) +asmlinkage long sys_ioprio_get(int which, int who) { struct task_struct *g, *p; struct user_struct *user; -- cgit v1.2.3 From 3f580470baa3afc423e38fdc6e19667446b5aac0 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Thu, 7 Jul 2005 17:56:51 -0700 Subject: [PATCH] uml: restore hppfs support Some time ago a trivial patch broke HPPFS (one var became a pointer, not all uses were updated). It wasn't fixed at that time because not very used, now it's been requested so I've fixed this, and it has been tested positively (at least partially). Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hppfs/hppfs_kern.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index f8e0cbd0cb6..6f553e17c37 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -491,7 +492,7 @@ static int hppfs_open(struct inode *inode, struct file *file) fd = open_host_sock(host_file, &filter); if(fd > 0){ data->contents = hppfs_get_data(fd, filter, - &data->proc_file, + data->proc_file, file, &data->len); if(!IS_ERR(data->contents)) data->host_fd = fd; @@ -543,7 +544,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file) static loff_t hppfs_llseek(struct file *file, loff_t off, int where) { struct hppfs_private *data = file->private_data; - struct file *proc_file = &data->proc_file; + struct file *proc_file = data->proc_file; loff_t (*llseek)(struct file *, loff_t, int); loff_t ret; @@ -586,7 +587,7 @@ static int hppfs_filldir(void *d, const char *name, int size, static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) { struct hppfs_private *data = file->private_data; - struct file *proc_file = &data->proc_file; + struct file *proc_file = data->proc_file; int (*readdir)(struct file *, void *, filldir_t); struct hppfs_dirent dirent = ((struct hppfs_dirent) { .vfs_dirent = ent, -- cgit v1.2.3 From a39722034ae37f80a1803bf781fe3fe1b03e20bc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 7 Jul 2005 17:56:56 -0700 Subject: [PATCH] page_uptodate locking scalability Use a bit spin lock in the first buffer of the page to synchronise asynch IO buffer completions, instead of the global page_uptodate_lock, which is showing some scalabilty problems. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 561e63a1496..6a25d7df89b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -513,8 +513,8 @@ static void free_more_memory(void) */ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) { - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; + struct buffer_head *first; struct buffer_head *tmp; struct page *page; int page_uptodate = 1; @@ -536,7 +536,9 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) * two buffer heads end IO at almost the same time and both * decide that the page is now completely done. */ - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -549,7 +551,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); /* * If none of the buffers had errors and they are all @@ -561,7 +564,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } @@ -572,8 +576,8 @@ still_busy: void end_buffer_async_write(struct buffer_head *bh, int uptodate) { char b[BDEVNAME_SIZE]; - static DEFINE_SPINLOCK(page_uptodate_lock); unsigned long flags; + struct buffer_head *first; struct buffer_head *tmp; struct page *page; @@ -594,7 +598,10 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) SetPageError(page); } - spin_lock_irqsave(&page_uptodate_lock, flags); + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + clear_buffer_async_write(bh); unlock_buffer(bh); tmp = bh->b_this_page; @@ -605,12 +612,14 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); end_page_writeback(page); return; still_busy: - spin_unlock_irqrestore(&page_uptodate_lock, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); return; } -- cgit v1.2.3 From b84c21572de8a732062eff5592e3c4b3b1793bb8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 7 Jul 2005 17:56:57 -0700 Subject: [PATCH] acl kconfig cleanup Original patch from Matt Mackall Signed-off-by: Andreas Gruenbacher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/Kconfig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index 06217795623..aae0686a15f 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -29,6 +29,7 @@ config EXT2_FS_XATTR config EXT2_FS_POSIX_ACL bool "Ext2 POSIX Access Control Lists" depends on EXT2_FS_XATTR + select FS_POSIX_ACL help Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. @@ -114,6 +115,7 @@ config EXT3_FS_XATTR config EXT3_FS_POSIX_ACL bool "Ext3 POSIX Access Control Lists" depends on EXT3_FS_XATTR + select FS_POSIX_ACL help Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. @@ -241,6 +243,7 @@ config REISERFS_FS_XATTR config REISERFS_FS_POSIX_ACL bool "ReiserFS POSIX Access Control Lists" depends on REISERFS_FS_XATTR + select FS_POSIX_ACL help Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. @@ -274,6 +277,7 @@ config JFS_FS config JFS_POSIX_ACL bool "JFS POSIX Access Control Lists" depends on JFS_FS + select FS_POSIX_ACL help Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. @@ -318,8 +322,7 @@ config FS_POSIX_ACL # Never use this symbol for ifdefs. # bool - depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || NFSD_V4 - default y + default n source "fs/xfs/Kconfig" @@ -1438,6 +1441,7 @@ config NFSD_V4 select NFSD_TCP select CRYPTO_MD5 select CRYPTO + select FS_POSIX_ACL help If you would like to include the NFSv4 server as well as the NFSv2 and NFSv3 servers, say Y here. This feature is experimental, and -- cgit v1.2.3 From 6c036527a630720063b67d9a65455e8caca2c8fa Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 7 Jul 2005 17:56:59 -0700 Subject: [PATCH] mostly_read data section Add a new section called ".data.read_mostly" for data items that are read frequently and rarely written to like cpumaps etc. If these maps are placed in the .data section then these frequenly read items may end up in cachelines with data is is frequently updated. In that case all processors in an SMP system must needlessly reload the cachelines again and again containing elements of those frequently used variables. The ability to share these cachelines will allow each cpu in an SMP system to keep local copies of those shared cachelines thereby optimizing performance. Signed-off-by: Alok N Kataria Signed-off-by: Shobhit Dayal Signed-off-by: Christoph Lameter Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 3a1472acc36..ca8f7a850fe 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -52,7 +52,7 @@ struct biovec_slab { */ #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] = { +static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), }; #undef BV -- cgit v1.2.3 From ff87b37da912d6aeab6c20c58f51b34d3e37f111 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 7 Jul 2005 17:57:00 -0700 Subject: [PATCH] ext3 xattr: Don't write to the in-inode xattr space of reserved inodes We are not using the in-inode space for xattrs in reserved inodes because mkfs.ext3 doesn't initialize it properly. For those inodes, we set i_extra_isize to 0. Make sure that we also don't overwrite the i_extra_isize field when writing out the inode in that case. This is for cleanliness only, and doesn't fix an actual bug. Signed-off-by: Andreas Gruenbacher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 0b2db4f618c..9989fdcf4d5 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2663,7 +2663,7 @@ static int ext3_do_update_inode(handle_t *handle, } else for (block = 0; block < EXT3_N_BLOCKS; block++) raw_inode->i_block[block] = ei->i_data[block]; - if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) + if (ei->i_extra_isize) raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- cgit v1.2.3 From 682d4fc93105ebf0bdfbb04a4b85047999b17844 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 7 Jul 2005 17:57:02 -0700 Subject: [PATCH] autofs4: mistake in debug print Fix debugging printk. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/waitq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index fa2348dcd67..3df86285a1c 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -231,8 +231,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, int type = (notify == NFY_MOUNT ? autofs_ptype_missing : autofs_ptype_expire_multi); - DPRINTK(("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", - (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify)); + DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", + (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); /* autofs4_notify_daemon() may block */ autofs4_notify_daemon(sbi, wq, type); -- cgit v1.2.3 From 7eaae2828dadae3abde7f77734c874d4b74b313a Mon Sep 17 00:00:00 2001 From: "KAMBAROV, ZAUR" Date: Thu, 7 Jul 2005 17:57:06 -0700 Subject: [PATCH] coverity: fs/locks.c flp null check We're dereferencing `flp' and then we're testing it for NULLness. Either the compiler accidentally saved us or the existing null-pointer checdk is redundant. This defect was found automatically by Coverity Prevent, a static analysis tool. Signed-off-by: Zaur Kambarov Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/locks.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index a0bc03495bd..29fa5da6c11 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1276,7 +1276,7 @@ int fcntl_getlease(struct file *filp) */ static int __setlease(struct file *filp, long arg, struct file_lock **flp) { - struct file_lock *fl, **before, **my_before = NULL, *lease = *flp; + struct file_lock *fl, **before, **my_before = NULL, *lease; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; int error, rdlease_count = 0, wrlease_count = 0; @@ -1287,6 +1287,8 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) if (!flp || !(*flp) || !(*flp)->fl_lmops || !(*flp)->fl_lmops->fl_break) goto out; + lease = *flp; + error = -EAGAIN; if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) goto out; -- cgit v1.2.3 From 202322e6f7cd12e82b5ff0fa92bbdf517fcf0947 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:22 -0700 Subject: [PATCH] namespace.c: fix mnt_namespace clearing This patch clears mnt_namespace on unmount. Not clearing mnt_namespace has two effects: 1) It is possible to attach a new mount to a detached mount, because check_mnt() returns true. This means, that when no other references to the detached mount remain, it still can't be freed. This causes a resource leak, and possibly un-removable modules. 2) If mnt_namespace is dereferenced (only in mark_mounts_for_expiry()) after the namspace has been freed, it can cause an Oops, memory corruption, etc. 1) has been tested before and after the patch, 2) is only speculation. Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 208c079e9fd..a0d0ef1f1a4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -345,6 +345,7 @@ static void umount_tree(struct vfsmount *mnt) for (p = mnt; p; p = next_mnt(p, mnt)) { list_del(&p->mnt_list); list_add(&p->mnt_list, &kill); + p->mnt_namespace = NULL; } while (!list_empty(&kill)) { @@ -1449,15 +1450,8 @@ void __init mnt_init(unsigned long mempages) void __put_namespace(struct namespace *namespace) { - struct vfsmount *mnt; - down_write(&namespace->sem); spin_lock(&vfsmount_lock); - - list_for_each_entry(mnt, &namespace->list, mnt_list) { - mnt->mnt_namespace = NULL; - } - umount_tree(namespace->root); spin_unlock(&vfsmount_lock); up_write(&namespace->sem); -- cgit v1.2.3 From 1ce88cf466f7b6078b14d67d186a3d7c19dd5609 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:24 -0700 Subject: [PATCH] namespace.c: fix race in mark_mounts_for_expiry() This patch fixes a race found by Ram in mark_mounts_for_expiry() in fs/namespace.c. The bug can only be triggered with simultaneous exiting of a process having a private namespace, and expiry of a mount from within that namespace. It's practically impossible to trigger, and I haven't even tried. But still, a bug is a bug. The race happens when put_namespace() is called by another task, while mark_mounts_for_expiry() is between atomic_read() and get_namespace(). In that case get_namespace() will be called on an already dead namespace with unforeseeable results. The solution was suggested by Al Viro, with his own words: Instead of screwing with atomic_read() in there, why don't we simply do the following: a) atomic_dec_and_lock() in put_namespace() b) __put_namespace() called without dropping lock c) the first thing done by __put_namespace would be struct vfsmount *root = namespace->root; namespace->root = NULL; spin_unlock(...); .... umount_tree(root); ... d) check in mark_... would be simply namespace && namespace->root. And we are all set; no screwing around with atomic_read(), no magic at all. Dying namespace gets NULL ->root. All changes of ->root happen under spinlock. If under a spinlock we see non-NULL ->mnt_namespace, it won't be freed until we drop the lock (we will set ->mnt_namespace to NULL under that lock before we get to freeing namespace). If under a spinlock we see non-NULL ->mnt_namespace and ->mnt_namespace->root, we can grab a reference to namespace and be sure that it won't go away. Signed-off-by: Miklos Szeredi Acked-by: Al Viro Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index a0d0ef1f1a4..9d17541ebaf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -869,7 +869,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ namespace = mnt->mnt_namespace; - if (!namespace || atomic_read(&namespace->count) <= 0) + if (!namespace || !namespace->root) continue; get_namespace(namespace); @@ -1450,9 +1450,12 @@ void __init mnt_init(unsigned long mempages) void __put_namespace(struct namespace *namespace) { + struct vfsmount *root = namespace->root; + namespace->root = NULL; + spin_unlock(&vfsmount_lock); down_write(&namespace->sem); spin_lock(&vfsmount_lock); - umount_tree(namespace->root); + umount_tree(root); spin_unlock(&vfsmount_lock); up_write(&namespace->sem); kfree(namespace); -- cgit v1.2.3 From a4d70278610e6bebe44a7b59a469fe7391387da6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:24 -0700 Subject: [PATCH] namespace.c: cleanup in mark_mounts_for_expiry() This patch simplifies mark_mounts_for_expiry() by using detach_mnt() instead of duplicating everything it does. It should be an equivalent transformation except for righting the dput/mntput order. Al Viro said: "Looks sane". Signed-off-by: Miklos Szeredi Cc: Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 9d17541ebaf..ea555a36c31 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -880,24 +880,13 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* check that it is still dead: the count should now be 2 - as * contributed by the vfsmount parent and the mntget above */ if (atomic_read(&mnt->mnt_count) == 2) { - struct vfsmount *xdmnt; - struct dentry *xdentry; + struct nameidata old_nd; /* delete from the namespace */ list_del_init(&mnt->mnt_list); - list_del_init(&mnt->mnt_child); - list_del_init(&mnt->mnt_hash); - mnt->mnt_mountpoint->d_mounted--; - - xdentry = mnt->mnt_mountpoint; - mnt->mnt_mountpoint = mnt->mnt_root; - xdmnt = mnt->mnt_parent; - mnt->mnt_parent = mnt; - + detach_mnt(mnt, &old_nd); spin_unlock(&vfsmount_lock); - - mntput(xdmnt); - dput(xdentry); + path_release(&old_nd); /* now lay it to rest if this was the last ref on the * superblock */ -- cgit v1.2.3 From 24ca2af1e7cff55e71e9f86c61ddc56e894b8b40 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:25 -0700 Subject: [PATCH] namespace.c: split mark_mounts_for_expiry() This patch splits the mark_mounts_for_expiry() function. It's too complex and too deeply nested, even without the bugfix in the following patch. Otherwise code is completely the same. Signed-off-by: Miklos Szeredi Cc: Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 71 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index ea555a36c31..d82cf18a1a9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -825,6 +825,44 @@ unlock: EXPORT_SYMBOL_GPL(do_add_mount); +static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) +{ + spin_lock(&vfsmount_lock); + + /* + * Check that it is still dead: the count should now be 2 - as + * contributed by the vfsmount parent and the mntget above + */ + if (atomic_read(&mnt->mnt_count) == 2) { + struct nameidata old_nd; + + /* delete from the namespace */ + list_del_init(&mnt->mnt_list); + detach_mnt(mnt, &old_nd); + spin_unlock(&vfsmount_lock); + path_release(&old_nd); + + /* + * Now lay it to rest if this was the last ref on the superblock + */ + if (atomic_read(&mnt->mnt_sb->s_active) == 1) { + /* last instance - try to be smart */ + lock_kernel(); + DQUOT_OFF(mnt->mnt_sb); + acct_auto_close(mnt->mnt_sb); + unlock_kernel(); + } + mntput(mnt); + } else { + /* + * Someone brought it back to life whilst we didn't have any + * locks held so return it to the expiration list + */ + list_add_tail(&mnt->mnt_fslink, mounts); + spin_unlock(&vfsmount_lock); + } +} + /* * process a list of expirable mountpoints with the intent of discarding any * mountpoints that aren't in use and haven't been touched since last we came @@ -875,38 +913,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) spin_unlock(&vfsmount_lock); down_write(&namespace->sem); - spin_lock(&vfsmount_lock); - - /* check that it is still dead: the count should now be 2 - as - * contributed by the vfsmount parent and the mntget above */ - if (atomic_read(&mnt->mnt_count) == 2) { - struct nameidata old_nd; - - /* delete from the namespace */ - list_del_init(&mnt->mnt_list); - detach_mnt(mnt, &old_nd); - spin_unlock(&vfsmount_lock); - path_release(&old_nd); - - /* now lay it to rest if this was the last ref on the - * superblock */ - if (atomic_read(&mnt->mnt_sb->s_active) == 1) { - /* last instance - try to be smart */ - lock_kernel(); - DQUOT_OFF(mnt->mnt_sb); - acct_auto_close(mnt->mnt_sb); - unlock_kernel(); - } - - mntput(mnt); - } else { - /* someone brought it back to life whilst we didn't - * have any locks held so return it to the expiration - * list */ - list_add_tail(&mnt->mnt_fslink, mounts); - spin_unlock(&vfsmount_lock); - } - + expire_mount(mnt, mounts); up_write(&namespace->sem); mntput(mnt); -- cgit v1.2.3 From ed42c879b7b1463aa7a15fdbbeb2b1914d60be8a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:26 -0700 Subject: [PATCH] namespace.c: fix expiring of detached mount This patch fixes a bug noticed by Al Viro: However, we still have a problem here - just what would happen if vfsmount is detached while we were grabbing namespace semaphore? Refcount alone is not useful here - we might be held by whoever had detached the vfsmount. IOW, we should check that it's still attached (i.e. that mnt->mnt_parent != mnt). If it's not - just leave it alone, do mntput() and let whoever holds it deal with the sucker. No need to put it back on lists. Signed-off-by: Miklos Szeredi Cc: Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index d82cf18a1a9..2b4635e43ae 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -829,6 +829,15 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) { spin_lock(&vfsmount_lock); + /* + * Check if mount is still attached, if not, let whoever holds it deal + * with the sucker + */ + if (mnt->mnt_parent == mnt) { + spin_unlock(&vfsmount_lock); + return; + } + /* * Check that it is still dead: the count should now be 2 - as * contributed by the vfsmount parent and the mntget above -- cgit v1.2.3 From ac0811538b40bb92d339d22364026ed91dfdd147 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:27 -0700 Subject: [PATCH] namespace.c: fix mnt_namespace zeroing for expired mounts This patch clears mnt_namespace in an expired mount. If mnt_namespace is not cleared, it's possible to attach a new mount to the already detached mount, because check_mnt() can return true. The effect is a resource leak, since the resulting tree will never be freed. Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 2b4635e43ae..7fd56eeb21b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -847,6 +847,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) /* delete from the namespace */ list_del_init(&mnt->mnt_list); + mnt->mnt_namespace = NULL; detach_mnt(mnt, &old_nd); spin_unlock(&vfsmount_lock); path_release(&old_nd); -- cgit v1.2.3 From 484e389c63472a7f8cfb491cf11b047364e59365 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:28 -0700 Subject: [PATCH] set mnt_namespace in the correct place This patch sets ->mnt_namespace where it's actually added to the namespace. Previously mnt_namespace was set in do_kern_mount() even if the filesystem was never added to any process's namespace (most kernel-internal filesystems). This discrepancy doesn't actually cause any problems, but it's cleaner if mnt_namespace is NULL for these non exported filesystems. Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 1 + fs/super.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index 7fd56eeb21b..b168dc37eaa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -808,6 +808,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, goto unlock; newmnt->mnt_flags = mnt_flags; + newmnt->mnt_namespace = current->namespace; err = graft_tree(newmnt, nd); if (err == 0 && fslist) { diff --git a/fs/super.c b/fs/super.c index 25bc1ec6bc5..6e57ee252e1 100644 --- a/fs/super.c +++ b/fs/super.c @@ -840,7 +840,6 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) mnt->mnt_root = dget(sb->s_root); mnt->mnt_mountpoint = sb->s_root; mnt->mnt_parent = mnt; - mnt->mnt_namespace = current->namespace; up_write(&sb->s_umount); free_secdata(secdata); put_filesystem(type); -- cgit v1.2.3 From 732dbef606f22a23cb3e1029d613977ec645e8ae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:29 -0700 Subject: [PATCH] dcookies.c: use proper refcounting functions Dcookies shouldn't play with the internals of dentry and vfsmnt refcounting. It defeats grepping, and is prone to break if implementation details change. In addition the function doesn't even seem to be performance critical: it calls kmem_cache_alloc(). Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcookies.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/dcookies.c b/fs/dcookies.c index 581aac959cd..02aa0ddc582 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -94,12 +94,10 @@ static struct dcookie_struct * alloc_dcookie(struct dentry * dentry, if (!dcs) return NULL; - atomic_inc(&dentry->d_count); - atomic_inc(&vfsmnt->mnt_count); dentry->d_cookie = dcs; - dcs->dentry = dentry; - dcs->vfsmnt = vfsmnt; + dcs->dentry = dget(dentry); + dcs->vfsmnt = mntget(vfsmnt); hash_dcookie(dcs); return dcs; -- cgit v1.2.3 From 55e700b924f9e0ba24e3a071d1097d050b05abe6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename mnt_fslink to mnt_expire This patch renames vfsmount->mnt_fslink to something a little more descriptive: vfsmount->mnt_expire. Signed-off-by: Mike Waychison Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index b168dc37eaa..587eb0d707e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -61,7 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); - INIT_LIST_HEAD(&mnt->mnt_fslink); + INIT_LIST_HEAD(&mnt->mnt_expire); if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -165,8 +165,8 @@ clone_mnt(struct vfsmount *old, struct dentry *root) /* stick the duplicate mount on the same expiry list * as the original if that was on one */ spin_lock(&vfsmount_lock); - if (!list_empty(&old->mnt_fslink)) - list_add(&mnt->mnt_fslink, &old->mnt_fslink); + if (!list_empty(&old->mnt_expire)) + list_add(&mnt->mnt_expire, &old->mnt_expire); spin_unlock(&vfsmount_lock); } return mnt; @@ -351,7 +351,7 @@ static void umount_tree(struct vfsmount *mnt) while (!list_empty(&kill)) { mnt = list_entry(kill.next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); - list_del_init(&mnt->mnt_fslink); + list_del_init(&mnt->mnt_expire); if (mnt->mnt_parent == mnt) { spin_unlock(&vfsmount_lock); } else { @@ -645,7 +645,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) if (mnt) { /* stop bind mounts from expiring */ spin_lock(&vfsmount_lock); - list_del_init(&mnt->mnt_fslink); + list_del_init(&mnt->mnt_expire); spin_unlock(&vfsmount_lock); err = graft_tree(mnt, nd); @@ -744,7 +744,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) /* if the mount is moved, it should no longer be expire * automatically */ - list_del_init(&old_nd.mnt->mnt_fslink); + list_del_init(&old_nd.mnt->mnt_expire); out2: spin_unlock(&vfsmount_lock); out1: @@ -814,7 +814,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, if (err == 0 && fslist) { /* add to the specified expiration list */ spin_lock(&vfsmount_lock); - list_add_tail(&newmnt->mnt_fslink, fslist); + list_add_tail(&newmnt->mnt_expire, fslist); spin_unlock(&vfsmount_lock); } @@ -869,7 +869,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) * Someone brought it back to life whilst we didn't have any * locks held so return it to the expiration list */ - list_add_tail(&mnt->mnt_fslink, mounts); + list_add_tail(&mnt->mnt_expire, mounts); spin_unlock(&vfsmount_lock); } } @@ -896,13 +896,13 @@ void mark_mounts_for_expiry(struct list_head *mounts) * - still marked for expiry (marked on the last call here; marks are * cleared by mntput()) */ - list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) { + list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { if (!xchg(&mnt->mnt_expiry_mark, 1) || atomic_read(&mnt->mnt_count) != 1) continue; mntget(mnt); - list_move(&mnt->mnt_fslink, &graveyard); + list_move(&mnt->mnt_expire, &graveyard); } /* @@ -912,8 +912,8 @@ void mark_mounts_for_expiry(struct list_head *mounts) * - dispose of the corpse */ while (!list_empty(&graveyard)) { - mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink); - list_del_init(&mnt->mnt_fslink); + mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); + list_del_init(&mnt->mnt_expire); /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ -- cgit v1.2.3 From 751c404b8f63e8199d5f2f8f2bcfd69b41d11caa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename _mntput to mntput_no_expire This patch renames _mntput() to something a little more descriptive: mntput_no_expire(). Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index fa8df81ce8c..1d93cb4f7c5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -314,7 +314,7 @@ void path_release(struct nameidata *nd) void path_release_on_umount(struct nameidata *nd) { dput(nd->dentry); - _mntput(nd->mnt); + mntput_no_expire(nd->mnt); } /* -- cgit v1.2.3 From 463090294e1e460cf97f5ade376d4b1e62bc5263 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:10 -0700 Subject: [PATCH] nfsd4: reboot recovery fix We need to remove the recovery directory here too. (This chunk just got lost somehow in the process of commuting the reboot recovery patches past the other patches.) Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 89e36526d7f..9f9db40b566 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -874,6 +874,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi * change request correctly. */ atomic_set(&conf->cl_callback.cb_set, 0); gen_confirm(conf); + nfsd4_remove_clid_dir(unconf); expire_client(unconf); status = nfs_ok; -- cgit v1.2.3 From a6ccbbb8865101d83c2e716f08feae1da1c48584 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:11 -0700 Subject: [PATCH] nfsd4: fix sync'ing of recovery directory We need to fsync the recovery directory after writing to it, but we weren't doing this correctly. (For example, we weren't taking the i_sem when calling ->fsync().) Just reuse the existing nfsd fsync code instead. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4recover.c | 29 ++++++++--------------------- fs/nfsd/vfs.c | 2 +- 2 files changed, 9 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 095f1740f3a..bb40083b6b7 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -119,25 +119,12 @@ out: return status; } -static int -nfsd4_rec_fsync(struct dentry *dentry) +static void +nfsd4_sync_rec_dir(void) { - struct file *filp; - int status = nfs_ok; - - dprintk("NFSD: nfs4_fsync_rec_dir\n"); - filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR); - if (IS_ERR(filp)) { - status = PTR_ERR(filp); - goto out; - } - if (filp->f_op && filp->f_op->fsync) - status = filp->f_op->fsync(filp, filp->f_dentry, 0); - fput(filp); -out: - if (status) - printk("nfsd4: unable to sync recovery directory\n"); - return status; + down(&rec_dir.dentry->d_inode->i_sem); + nfsd_sync_dir(rec_dir.dentry); + up(&rec_dir.dentry->d_inode->i_sem); } int @@ -176,7 +163,7 @@ out_unlock: up(&rec_dir.dentry->d_inode->i_sem); if (status == 0) { clp->cl_firststate = 1; - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); } nfs4_reset_user(uid, gid); dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); @@ -331,7 +318,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); nfs4_reset_user(uid, gid); if (status == 0) - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); if (status) printk("NFSD: Failed to remove expired client state directory" " %.*s\n", HEXDIR_LEN, clp->cl_recdir); @@ -362,7 +349,7 @@ nfsd4_recdir_purge_old(void) { return; status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); if (status == 0) - status = nfsd4_rec_fsync(rec_dir.dentry); + nfsd4_sync_rec_dir(); if (status) printk("nfsd4: failed to purge old clients from recovery" " directory %s\n", rec_dir.dentry->d_name.name); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index be24ead89d9..5e0bf391760 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -733,7 +733,7 @@ nfsd_sync(struct file *filp) up(&inode->i_sem); } -static void +void nfsd_sync_dir(struct dentry *dp) { nfsd_dosync(NULL, dp, dp->d_inode->i_fop); -- cgit v1.2.3 From cdc5524e8a257b1c91dd8e4cdfbab979f4e17a60 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:12 -0700 Subject: [PATCH] nfsd4: lookup_one_len takes i_sem Oops, this lookup_one_len needs the i_sem. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4recover.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index bb40083b6b7..53abb333732 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -289,7 +289,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); + down(&rec_dir.dentry->d_inode->i_sem); dentry = lookup_one_len(name, rec_dir.dentry, namlen); + up(&rec_dir.dentry->d_inode->i_sem); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); return status; -- cgit v1.2.3 From 67be431350941765e211eeed237c12def3aaba70 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:13 -0700 Subject: [PATCH] nfsd4: prevent multiple unlinks of recovery directories Make sure we don't try to delete client recovery directories multiple times; fixes some spurious error messages. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4recover.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 53abb333732..57ed50fe7f8 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -316,6 +316,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (!rec_dir_init || !clp->cl_firststate) return; + clp->cl_firststate = 0; nfs4_save_user(&uid, &gid); status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); nfs4_reset_user(uid, gid); -- cgit v1.2.3 From 0fa822e452084032b8495ca0d8e0199329847815 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:14 -0700 Subject: [PATCH] nfsd4: fix release_lockowner We oops in list_for_each_entry(), because release_stateowner frees something on the list we're traversing. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9f9db40b566..e388c9070de 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3084,7 +3084,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner * * of the lockowner state released; so don't release any until all * have been checked. */ status = nfs_ok; - list_for_each_entry(sop, &matches, so_perclient) { + while (!list_empty(&matches)) { + sop = list_entry(matches.next, struct nfs4_stateowner, + so_perclient); + /* unhash_stateowner deletes so_perclient only + * for openowners. */ + list_del(&sop->so_perclient); release_stateowner(sop); } out: -- cgit v1.2.3 From b648330a1d741d5df8a5076b2a0a2519c69c8f41 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:15 -0700 Subject: [PATCH] nfsd4: ERR_GRACE should bump seqid on open The GRACE and NOGRACE errors should bump the sequence id on open. So we delay the handling of these errors until nfsd4_process_open2, at which point we've set the open owner, so the encode routine will be able to bump the sequence id. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4proc.c | 6 ------ fs/nfsd/nfs4state.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d71f14517b9..e08edc17c6a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -169,12 +169,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open (int)open->op_fname.len, open->op_fname.data, open->op_stateowner); - if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) - return nfserr_grace; - - if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) - return nfserr_no_grace; - /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e388c9070de..568d5deacac 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1790,6 +1790,12 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfs4_delegation *dp = NULL; int status; + if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_grace; + + if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_no_grace; + status = nfserr_inval; if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) goto out; -- cgit v1.2.3 From 0dd395dc76071a06eea39839cc946c1241af3650 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:15 -0700 Subject: [PATCH] nfsd4: ERR_GRACE should bump seqid on lock A GRACE or NOGRACE response to a lock request should also bump the sequence id. So we delay the handling of grace period errors till after we've found the relevant owner. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 568d5deacac..92968c94c6e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2706,11 +2706,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock (long long) lock->lk_offset, (long long) lock->lk_length); - if (nfs4_in_grace() && !lock->lk_reclaim) - return nfserr_grace; - if (!nfs4_in_grace() && lock->lk_reclaim) - return nfserr_no_grace; - if (check_lock_length(lock->lk_offset, lock->lk_length)) return nfserr_inval; @@ -2785,6 +2780,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock goto out; } + status = nfserr_grace; + if (nfs4_in_grace() && !lock->lk_reclaim) + goto out; + status = nfserr_no_grace; + if (!nfs4_in_grace() && lock->lk_reclaim) + goto out; + locks_init_lock(&file_lock); switch (lock->lk_type) { case NFS4_READ_LT: -- cgit v1.2.3 From 375151773125f56b7f6d798d914ea469256b330b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:16 -0700 Subject: [PATCH] nfsd4: stop overusing RECLAIM_BAD A misreading of the spec lead us to convert all errors on open and lock reclaims to RECLAIM_BAD. This causes problems--for example, a reboot within the grace period could lead to reclaims with stale stateid's, and we'd like to return STALE errors in those cases. What rfc3530 actually says about RECLAIM_BAD: "The reclaim provided by the client does not match any of the server's state consistency checks and is bad." I'm assuming that "state consistency checks" refers to checks for consistency with the state recorded to stable storage, and that the error should be reserved for that case. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 92968c94c6e..142b63bc205 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1531,8 +1531,6 @@ renew: status = nfs_ok; renew_client(sop->so_client); out: - if (status && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) - status = nfserr_reclaim_bad; return status; } @@ -1688,17 +1686,11 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta /* decrement seqid on successful reclaim, it will be bumped in encode_open */ static void -nfs4_set_claim_prev(struct nfsd4_open *open, int *status) +nfs4_set_claim_prev(struct nfsd4_open *open) { - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) { - if (*status) - *status = nfserr_reclaim_bad; - else { - open->op_stateowner->so_confirmed = 1; - open->op_stateowner->so_client->cl_firststate = 1; - open->op_stateowner->so_seqid--; - } - } + open->op_stateowner->so_confirmed = 1; + open->op_stateowner->so_client->cl_firststate = 1; + open->op_stateowner->so_seqid--; } /* @@ -1863,8 +1855,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf out: if (fp) put_nfs4_file(fp); - /* CLAIM_PREVIOUS has different error returns */ - nfs4_set_claim_prev(open, &status); + if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + nfs4_set_claim_prev(open); /* * To finish the open response, we just need to set the rflags. */ @@ -2738,11 +2730,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock CHECK_FH | OPEN_STATE, &open_sop, &open_stp, &lock->v.new.clientid); - if (status) { - if (lock->lk_reclaim) - status = nfserr_reclaim_bad; + if (status) goto out; - } /* create lockowner and lock stateid */ fp = open_stp->st_file; strhashval = lock_ownerstr_hashval(fp->fi_inode, -- cgit v1.2.3 From 893f87701c9e5bd5610dfbb3f8bf1135f86d85cb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:17 -0700 Subject: [PATCH] nfsd4: comment indentation Yeah, it's trivial, but this drives me up the wall.... Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 142b63bc205..74cd9bf3e0a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2704,11 +2704,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock nfs4_lock_state(); if (lock->lk_is_new) { - /* - * Client indicates that this is a new lockowner. - * Use open owner and open stateid to create lock owner and lock - * stateid. - */ + /* + * Client indicates that this is a new lockowner. + * Use open owner and open stateid to create lock owner and + * lock stateid. + */ struct nfs4_stateid *open_stp = NULL; struct nfs4_file *fp; @@ -2842,10 +2842,10 @@ conflicting_lock: out_destroy_new_stateid: if (lock->lk_is_new) { dprintk("NFSD: nfsd4_lock: destroy new stateid!\n"); - /* - * An error encountered after instantiation of the new - * stateid has forced us to destroy it. - */ + /* + * An error encountered after instantiation of the new + * stateid has forced us to destroy it. + */ if (!seqid_mutating_err(status)) open_sop->so_seqid--; -- cgit v1.2.3 From bd9aac523b812d58e644fde5e59f5697fb9e3822 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:19 -0700 Subject: [PATCH] nfsd4: fix open_reclaim seqid The sequence number we store in the sequence id is the last one we received from the client. So on the next operation we'll check that the client gives us the next higher number. We increment sequence id's at the last moment, in encode, so that we're sure of knowing the right error return. (The decision to increment the sequence id depends on the exact error returned.) However on the *first* use of a sequence number, if we set the sequence number to the one received from the client and then let the increment happen on encode, we'll be left with a sequence number one to high. For that reason, ENCODE_SEQID_OP_TAIL only increments the sequence id on *confirmed* stateowners. This creates a problem for open reclaims, which are confirmed on first use. Therefore the open reclaim code, as a special exception, *decrements* the sequence id, cancelling out the undesired increment on encode. But this prevents the sequence id from ever being incremented in the case where multiple reclaims are sent with the same openowner. Yuch! We could add another exception to the open reclaim code, decrementing the sequence id only if this is the first use of the open owner. But it's simpler by far to modify the meaning of the op_seqid field: instead of representing the previous value sent by the client, we take op_seqid, after encoding, to represent the *next* sequence id that we expect from the client. This eliminates the need for special-case handling of the first use of a stateowner. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 15 ++++++--------- fs/nfsd/nfs4xdr.c | 3 +-- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 74cd9bf3e0a..f60bcad77f7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1483,7 +1483,7 @@ nfsd4_process_open1(struct nfsd4_open *open) if (sop) { open->op_stateowner = sop; /* check for replay */ - if (open->op_seqid == sop->so_seqid){ + if (open->op_seqid == sop->so_seqid - 1){ if (sop->so_replay.rp_buflen) return NFSERR_REPLAY_ME; else { @@ -1498,7 +1498,7 @@ nfsd4_process_open1(struct nfsd4_open *open) goto renew; } } else if (sop->so_confirmed) { - if (open->op_seqid == sop->so_seqid + 1) + if (open->op_seqid == sop->so_seqid) goto renew; status = nfserr_bad_seqid; goto out; @@ -1684,13 +1684,11 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta } -/* decrement seqid on successful reclaim, it will be bumped in encode_open */ static void nfs4_set_claim_prev(struct nfsd4_open *open) { open->op_stateowner->so_confirmed = 1; open->op_stateowner->so_client->cl_firststate = 1; - open->op_stateowner->so_seqid--; } /* @@ -2234,7 +2232,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei * For the moment, we ignore the possibility of * generation number wraparound. */ - if (seqid != sop->so_seqid + 1) + if (seqid != sop->so_seqid) goto check_replay; if (sop->so_confirmed) { @@ -2280,12 +2278,12 @@ no_nfs4_stateid: *sopp = sop; check_replay: - if (seqid == sop->so_seqid) { + if (seqid == sop->so_seqid - 1) { printk("NFSD: preprocess_seqid_op: retransmission?\n"); /* indicate replay to calling function */ status = NFSERR_REPLAY_ME; } else { - printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid); + printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid, seqid); *sopp = NULL; status = nfserr_bad_seqid; @@ -2608,7 +2606,6 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid, * occured. * * strhashval = lock_ownerstr_hashval - * so_seqid = lock->lk_new_lock_seqid - 1: it gets bumped in encode */ static struct nfs4_stateowner * @@ -2633,7 +2630,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str sop->so_is_open_owner = 0; sop->so_id = current_ownerid++; sop->so_client = clp; - sop->so_seqid = lock->lk_new_lock_seqid - 1; + sop->so_seqid = lock->lk_new_lock_seqid; sop->so_confirmed = 1; rp = &sop->so_replay; rp->rp_status = NFSERR_SERVERFAULT; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 91fb171d2ac..5207068cde1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1218,8 +1218,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) #define ENCODE_SEQID_OP_TAIL(stateowner) do { \ if (seqid_mutating_err(nfserr) && stateowner) { \ - if (stateowner->so_confirmed) \ - stateowner->so_seqid++; \ + stateowner->so_seqid++; \ stateowner->so_replay.rp_status = nfserr; \ stateowner->so_replay.rp_buflen = \ (((char *)(resp)->p - (char *)save)); \ -- cgit v1.2.3 From 7fb64cee34f5dc743f697041717cafda8a94b5ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:20 -0700 Subject: [PATCH] nfsd4: seqid comments Add some comments on the use of so_seqid, in an attempt to avoid some of the confusion outlined in the previous patch.... Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4xdr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5207068cde1..1515c5b8096 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1210,10 +1210,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) save = resp->p; /* - * Routine for encoding the result of a - * "seqid-mutating" NFSv4 operation. This is - * where seqids are incremented, and the - * replay cache is filled. + * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This + * is where sequence id's are incremented, and the replay cache is filled. + * Note that we increment sequence id's here, at the last moment, so we're sure + * we know whether the error to be returned is a sequence id mutating error. */ #define ENCODE_SEQID_OP_TAIL(stateowner) do { \ -- cgit v1.2.3 From e66770cd7b0c36f28a2f6eb0957c0575ac8b3787 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:21 -0700 Subject: [PATCH] nfsd4: relax new lock seqid check We're insisting that the lock sequence id field passed in the open_to_lockowner struct always be zero. This is probably thanks to the sentence in rfc3530: "The first request issued for any given lock_owner is issued with a sequence number of zero." But there doesn't seem to be any problem with allowing initial sequence numbers other than zero. And currently this is causing lock reclaims from the Linux client to fail. In the spirit of "be liberal in what you accept, conservative in what you send", we'll relax the check (and patch the Linux client as well). Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f60bcad77f7..386daac508f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2715,11 +2715,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock goto out; } - /* is the new lock seqid presented by the client zero? */ - status = nfserr_bad_seqid; - if (lock->v.new.lock_seqid != 0) - goto out; - /* validate and update open stateid and open seqid */ status = nfs4_preprocess_seqid_op(current_fh, lock->lk_new_open_seqid, -- cgit v1.2.3 From 444c2c07c2d7a6936d1381d381ab80e3f5541427 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:22 -0700 Subject: [PATCH] nfsd4: always update stateid on open An OPEN from the same client/open stateowner requires a stateid update because of the share/deny access update. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 386daac508f..b96714ae3dd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1820,6 +1820,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_upgrade_open(rqstp, current_fh, stp, open); if (status) goto out; + update_stateid(&stp->st_stateid); } else { /* Stateid was not found, this is a new OPEN */ int flags = 0; -- cgit v1.2.3 From b700949b781480819e53bdc38a53f053226dd75e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:23 -0700 Subject: [PATCH] nfsd4: return better error on io incompatible with open mode from RFC 3530: "Share reservations are established by OPEN operations and by their nature are mandatory in that when the OPEN denies READ or WRITE operations, that denial results in such operations being rejected with error NFS4ERR_LOCKED." (Note that share_denied is really only a legal error for OPEN.) Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b96714ae3dd..3647c942915 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1295,7 +1295,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) fp = find_file(ino); if (!fp) return nfs_ok; - ret = nfserr_share_denied; + ret = nfserr_locked; /* Search for conflicting share reservations */ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { if (test_bit(deny_type, &stp->st_deny_bmap) || -- cgit v1.2.3 From 52fd004e296ac07cde820af9e3139d47dda03cf8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:24 -0700 Subject: [PATCH] nfsd4: renew lease on seqid modifying operations nfs4_preprocess_seqid_op is called by NFSv4 operations that imply an implicit renewal of the client lease. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3647c942915..7694fb8aae3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2258,7 +2258,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei printk("NFSD: preprocess_seqid_op: old stateid!\n"); goto out; } - /* XXX renew the client lease here */ + renew_client(sop->so_client); status = nfs_ok; out: -- cgit v1.2.3 From f8816512fcfde986326a2eb0f5a58e463d9904d8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:25 -0700 Subject: [PATCH] nfsd4: clarify close_lru handling The handling of close_lru in preprocess_stateid_op was a source of some confusion here recently. Try to make the logic a little clearer, by renaming find_openstateowner_id to make its purpose clearer and untangling some unnecessarily complicated goto's. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7694fb8aae3..67e03b5d0d8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1988,14 +1988,11 @@ laundromat_main(void *not_used) queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); } -/* search ownerid_hashtbl[] and close_lru for stateid owner - * (stateid->si_stateownerid) - */ static struct nfs4_stateowner * -find_openstateowner_id(u32 st_id, int flags) { +search_close_lru(u32 st_id, int flags) +{ struct nfs4_stateowner *local = NULL; - dprintk("NFSD: find_openstateowner_id %d\n", st_id); if (flags & CLOSE_STATE) { list_for_each_entry(local, &close_lru, so_close_lru) { if (local->so_id == st_id) @@ -2193,13 +2190,19 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei * We return BAD_STATEID if filehandle doesn't match stateid, * the confirmed flag is incorrecly set, or the generation * number is incorrect. - * If there is no entry in the openfile table for this id, - * we can't always return BAD_STATEID; - * this might be a retransmitted CLOSE which has arrived after - * the openfile has been released. */ - if (!(stp = find_stateid(stateid, flags))) - goto no_nfs4_stateid; + stp = find_stateid(stateid, flags); + if (stp == NULL) { + /* + * Also, we should make sure this isn't just the result of + * a replayed close: + */ + sop = search_close_lru(stateid->si_stateownerid, flags); + if (sop == NULL) + return nfserr_bad_stateid; + *sopp = sop; + goto check_replay; + } status = nfserr_bad_stateid; @@ -2263,21 +2266,6 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei out: return status; - -no_nfs4_stateid: - - /* - * We determine whether this is a bad stateid or a replay, - * starting by trying to look up the stateowner. - * If stateowner is not found - stateid is bad. - */ - if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) { - printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n"); - status = nfserr_bad_stateid; - goto out; - } - *sopp = sop; - check_replay: if (seqid == sop->so_seqid - 1) { printk("NFSD: preprocess_seqid_op: retransmission?\n"); -- cgit v1.2.3 From 3a4f98bbf481cb9f755005ac569ceb5303e1b69f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:26 -0700 Subject: [PATCH] nfsd4: clean up nfs4_preprocess_seqid_op As long as we're here, do some miscellaneous cleanup. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 56 +++++++++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 67e03b5d0d8..59b214f01b6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2165,7 +2165,6 @@ out: static int nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) { - int status; struct nfs4_stateid *stp; struct nfs4_stateowner *sop; @@ -2173,19 +2172,17 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei "stateid = (%08x/%08x/%08x/%08x)\n", seqid, stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, stateid->si_generation); - + *stpp = NULL; *sopp = NULL; - status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { printk("NFSD: preprocess_seqid_op: magic stateid!\n"); - goto out; + return nfserr_bad_stateid; } - status = nfserr_stale_stateid; if (STALE_STATEID(stateid)) - goto out; + return nfserr_stale_stateid; /* * We return BAD_STATEID if filehandle doesn't match stateid, * the confirmed flag is incorrecly set, or the generation @@ -2204,8 +2201,6 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei goto check_replay; } - status = nfserr_bad_stateid; - /* for new lock stateowners: * check that the lock->v.new.open_stateid * refers to an open stateowner @@ -2218,14 +2213,14 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei struct nfs4_client *clp = sop->so_client; if (!sop->so_is_open_owner) - goto out; + return nfserr_bad_stateid; if (!cmp_clid(&clp->cl_clientid, lockclid)) - goto out; + return nfserr_bad_stateid; } if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); - goto out; + return nfserr_bad_stateid; } *stpp = stp; @@ -2239,45 +2234,38 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei if (seqid != sop->so_seqid) goto check_replay; - if (sop->so_confirmed) { - if (flags & CONFIRM) { - printk("NFSD: preprocess_seqid_op: expected unconfirmed stateowner!\n"); - goto out; - } + if (sop->so_confirmed && flags & CONFIRM) { + printk("NFSD: preprocess_seqid_op: expected" + " unconfirmed stateowner!\n"); + return nfserr_bad_stateid; } - else { - if (!(flags & CONFIRM)) { - printk("NFSD: preprocess_seqid_op: stateowner not confirmed yet!\n"); - goto out; - } + if (!sop->so_confirmed && !(flags & CONFIRM)) { + printk("NFSD: preprocess_seqid_op: stateowner not" + " confirmed yet!\n"); + return nfserr_bad_stateid; } if (stateid->si_generation > stp->st_stateid.si_generation) { printk("NFSD: preprocess_seqid_op: future stateid?!\n"); - goto out; + return nfserr_bad_stateid; } - status = nfserr_old_stateid; if (stateid->si_generation < stp->st_stateid.si_generation) { printk("NFSD: preprocess_seqid_op: old stateid!\n"); - goto out; + return nfserr_old_stateid; } renew_client(sop->so_client); - status = nfs_ok; + return nfs_ok; -out: - return status; check_replay: if (seqid == sop->so_seqid - 1) { printk("NFSD: preprocess_seqid_op: retransmission?\n"); /* indicate replay to calling function */ - status = NFSERR_REPLAY_ME; - } else { - printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid, seqid); - - *sopp = NULL; - status = nfserr_bad_seqid; + return NFSERR_REPLAY_ME; } - goto out; + printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n", + sop->so_seqid, seqid); + *sopp = NULL; + return nfserr_bad_seqid; } int -- cgit v1.2.3 From 4c4cd222ee329025840bc2f8cebf71d36c62440c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:27 -0700 Subject: [PATCH] nfsd4: check lock type against openmode. We shouldn't be allowing, e.g., write locks on files not open for read. To enforce this, we add a pointer from the lock stateid back to the open stateid it came from, so that the check will continue to be correct even after the open is upgraded or downgraded. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4state.c | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 59b214f01b6..b83f8fb441e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1160,6 +1160,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * stp->st_deny_bmap = 0; __set_bit(open->op_share_access, &stp->st_access_bmap); __set_bit(open->op_share_deny, &stp->st_deny_bmap); + stp->st_openstp = NULL; } static void @@ -2158,12 +2159,18 @@ out: return status; } +static inline int +setlkflg (int type) +{ + return (type == NFS4_READW_LT || type == NFS4_READ_LT) ? + RD_STATE : WR_STATE; +} /* * Checks for sequence id mutating operations. */ static int -nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) +nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock) { struct nfs4_stateid *stp; struct nfs4_stateowner *sop; @@ -2201,21 +2208,31 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei goto check_replay; } - /* for new lock stateowners: - * check that the lock->v.new.open_stateid - * refers to an open stateowner - * - * check that the lockclid (nfs4_lock->v.new.clientid) is the same - * as the open_stateid->st_stateowner->so_client->clientid - */ - if (lockclid) { + if (lock) { struct nfs4_stateowner *sop = stp->st_stateowner; + clientid_t *lockclid = &lock->v.new.clientid; struct nfs4_client *clp = sop->so_client; + int lkflg = 0; + int status; + + lkflg = setlkflg(lock->lk_type); + + if (lock->lk_is_new) { + if (!sop->so_is_open_owner) + return nfserr_bad_stateid; + if (!cmp_clid(&clp->cl_clientid, lockclid)) + return nfserr_bad_stateid; + /* stp is the open stateid */ + status = nfs4_check_openmode(stp, lkflg); + if (status) + return status; + } else { + /* stp is the lock stateid */ + status = nfs4_check_openmode(stp->st_openstp, lkflg); + if (status) + return status; + } - if (!sop->so_is_open_owner) - return nfserr_bad_stateid; - if (!cmp_clid(&clp->cl_clientid, lockclid)) - return nfserr_bad_stateid; } if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { @@ -2642,6 +2659,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ stp->st_access_bmap = open_stp->st_access_bmap; stp->st_deny_bmap = open_stp->st_deny_bmap; + stp->st_openstp = open_stp; out: return stp; @@ -2697,8 +2715,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock lock->lk_new_open_seqid, &lock->lk_new_open_stateid, CHECK_FH | OPEN_STATE, - &open_sop, &open_stp, - &lock->v.new.clientid); + &open_sop, &open_stp, lock); if (status) goto out; /* create lockowner and lock stateid */ @@ -2726,7 +2743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, CHECK_FH | LOCK_STATE, - &lock->lk_stateowner, &lock_stp, NULL); + &lock->lk_stateowner, &lock_stp, lock); if (status) goto out; } -- cgit v1.2.3 From e34ac862ee6644378bfe6ea65c2e0dda4545513d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:30 -0700 Subject: [PATCH] nfsd4: fix fh_expire_type After discussion at the recent NFSv4 bake-a-thon, I realized that my assumption that NFS4_FH_PERSISTENT required filehandles to persist was a misreading of the spec. This also fixes an interoperability problem with the Solaris client. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1515c5b8096..4c414635023 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1366,9 +1366,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if ((buflen -= 4) < 0) goto out_resource; if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) - WRITE32(NFS4_FH_VOLATILE_ANY); + WRITE32(NFS4_FH_PERSISTENT); else - WRITE32(NFS4_FH_VOLATILE_ANY|NFS4_FH_VOL_RENAME); + WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { /* -- cgit v1.2.3