diff options
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/Makefile | 1 | ||||
-rw-r--r-- | fs/nfs/callback.c | 1 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 1 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 2 | ||||
-rw-r--r-- | fs/nfs/delegation.c | 1 | ||||
-rw-r--r-- | fs/nfs/dir.c | 160 | ||||
-rw-r--r-- | fs/nfs/direct.c | 2 | ||||
-rw-r--r-- | fs/nfs/file.c | 48 | ||||
-rw-r--r-- | fs/nfs/idmap.c | 1 | ||||
-rw-r--r-- | fs/nfs/inode.c | 427 | ||||
-rw-r--r-- | fs/nfs/mount_clnt.c | 4 | ||||
-rw-r--r-- | fs/nfs/nfs3acl.c | 403 | ||||
-rw-r--r-- | fs/nfs/nfs3proc.c | 43 | ||||
-rw-r--r-- | fs/nfs/nfs3xdr.c | 147 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 253 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 429 | ||||
-rw-r--r-- | fs/nfs/nfs4renewd.c | 1 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 193 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 241 | ||||
-rw-r--r-- | fs/nfs/nfsroot.c | 9 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 142 | ||||
-rw-r--r-- | fs/nfs/proc.c | 1 | ||||
-rw-r--r-- | fs/nfs/read.c | 3 | ||||
-rw-r--r-- | fs/nfs/write.c | 108 |
24 files changed, 2087 insertions, 534 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b4baa031edf..8b3bb715d17 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -8,6 +8,7 @@ nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ proc.o read.o symlink.o unlink.o write.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 560d6175dd5..f2ca782aba3 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svcsock.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ece27e42b93..65f1e19e4d1 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include <linux/config.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d271df9df2b..7c33b9a81a9 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -10,6 +10,7 @@ #include <linux/sunrpc/svc.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #include "callback.h" #define CB_OP_TAGLEN_MAXSZ (512) @@ -410,7 +411,6 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - rqstp->rq_res.head[0].iov_len = PAGE_SIZE; xdr_init_encode(&xdr_out, &rqstp->rq_res, p); decode_compound_hdr_arg(&xdr_in, &hdr_arg); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5b9c60f9779..d7f7eb669d0 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -16,6 +16,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_xdr.h> +#include "nfs4_fs.h" #include "delegation.h" static struct nfs_delegation *nfs_alloc_delegation(void) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ff6155f5e8d..b38a57e78a6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -32,6 +32,7 @@ #include <linux/smp_lock.h> #include <linux/namei.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFS_PARANOIA 1 @@ -50,8 +51,10 @@ static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, struct dentry *, int); +static loff_t nfs_llseek_dir(struct file *, loff_t, int); struct file_operations nfs_dir_operations = { + .llseek = nfs_llseek_dir, .read = generic_read_dir, .readdir = nfs_readdir, .open = nfs_opendir, @@ -74,6 +77,27 @@ struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_V3 */ + #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); @@ -90,6 +114,9 @@ struct inode_operations nfs4_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, }; #endif /* CONFIG_NFS_V4 */ @@ -116,7 +143,8 @@ typedef struct { struct page *page; unsigned long page_index; u32 *ptr; - u64 target; + u64 *dir_cookie; + loff_t current_index; struct nfs_entry *entry; decode_dirent_t decode; int plus; @@ -164,12 +192,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either - * throught inode->i_sem or some other mechanism. + * through inode->i_sem or some other mechanism. */ - if (page->index == 0) { - invalidate_inode_pages(inode->i_mapping); - NFS_I(inode)->readdir_timestamp = timestamp; - } + if (page->index == 0) + invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); unlock_page(page); return 0; error: @@ -202,22 +228,22 @@ void dir_page_release(nfs_readdir_descriptor_t *desc) /* * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry. + * to readdir, find the next entry with cookie '*desc->dir_cookie'. * * If the end of the buffer has been reached, return -EAGAIN, if not, * return the offset within the buffer of the next entry to be * read. */ static inline -int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) +int find_dirent(nfs_readdir_descriptor_t *desc) { struct nfs_entry *entry = desc->entry; int loop_count = 0, status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie); - if (entry->prev_cookie == desc->target) + dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { loop_count = 0; @@ -229,8 +255,44 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) } /* - * Find the given page, and call find_dirent() in order to try to - * return the next entry. + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the entry at offset 'desc->file->f_pos'. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent_index(nfs_readdir_descriptor_t *desc) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + for(;;) { + status = dir_decode(desc); + if (status) + break; + + dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); + + if (desc->file->f_pos == desc->current_index) { + *desc->dir_cookie = entry->cookie; + break; + } + desc->current_index++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); + return status; +} + +/* + * Find the given page, and call find_dirent() or find_dirent_index in + * order to try to return the next entry. */ static inline int find_dirent_page(nfs_readdir_descriptor_t *desc) @@ -253,7 +315,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) /* NOTE: Someone else may have changed the READDIRPLUS flag */ desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - status = find_dirent(desc, page); + if (*desc->dir_cookie != 0) + status = find_dirent(desc); + else + status = find_dirent_index(desc); if (status < 0) dir_page_release(desc); out: @@ -268,7 +333,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) * Recurse through the page cache pages, and return a * filled nfs_entry structure of the next directory entry if possible. * - * The target for the search is 'desc->target'. + * The target for the search is '*desc->dir_cookie' if non-0, + * 'desc->file->f_pos' otherwise */ static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) @@ -276,7 +342,16 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) int loop_count = 0; int res; - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target); + /* Always search-by-index from the beginning of the cache */ + if (*desc->dir_cookie == 0) { + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + desc->current_index = 0; + } else + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + for (;;) { res = find_dirent_page(desc); if (res != -EAGAIN) @@ -313,7 +388,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -333,10 +408,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, } res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, d_type); + file->f_pos, fileid, d_type); if (res < 0) break; - file->f_pos = desc->target = entry->cookie; + file->f_pos++; + *desc->dir_cookie = entry->cookie; if (dir_decode(desc) != 0) { desc->page_index ++; break; @@ -349,7 +425,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; } @@ -375,14 +451,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { status = -ENOMEM; goto out; } - desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target, + desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); @@ -391,7 +467,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = desc->target; + desc->entry->prev_cookie = *desc->dir_cookie; } else status = -EIO; if (status < 0) @@ -412,8 +488,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } -/* The file offset position is now represented as a true offset into the - * page cache as is the case in most of the other filesystems. +/* The file offset position represents the dirent entry number. A + last cookie cache takes care of the common case of reading the + whole directory. */ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -435,15 +512,15 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } /* - * filp->f_pos points to the file offset in the page cache. - * but if the cache has meanwhile been zapped, we need to - * read from the last dirent to revalidate f_pos - * itself. + * filp->f_pos points to the dirent entry number. + * *desc->dir_cookie has the cookie for the next entry. We have + * to either find the entry with the appropriate number or + * revalidate the cookie. */ memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->target = filp->f_pos; + desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -455,9 +532,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) while(!desc->entry->eof) { res = readdir_search_pagecache(desc); + if (res == -EBADCOOKIE) { /* This means either end of directory */ - if (desc->entry->cookie != desc->target) { + if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); if (res >= 0) @@ -490,6 +568,28 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) return 0; } +loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +{ + down(&filp->f_dentry->d_inode->i_sem); + switch (origin) { + case 1: + offset += filp->f_pos; + case 0: + if (offset >= 0) + break; + default: + offset = -EINVAL; + goto out; + } + if (offset != filp->f_pos) { + filp->f_pos = offset; + ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + } +out: + up(&filp->f_dentry->d_inode->i_sem); + return offset; +} + /* * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 68df803f27c..d6a30c844de 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -517,7 +517,7 @@ retry: result = tot_bytes; out: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return result; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 55c90759249..5621ba9885f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -71,6 +71,18 @@ struct inode_operations nfs_file_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_v3 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -116,6 +128,21 @@ nfs_file_release(struct inode *inode, struct file *filp) } /** + * nfs_revalidate_file - Revalidate the page cache & related metadata + * @inode - pointer to inode struct + * @file - pointer to file + */ +static int nfs_revalidate_file(struct inode *inode, struct file *filp) +{ + int retval = 0; + + if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + nfs_revalidate_mapping(inode, filp->f_mapping); + return 0; +} + +/** * nfs_revalidate_size - Revalidate the file size * @inode - pointer to inode struct * @file - pointer to struct file @@ -137,7 +164,8 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) goto force_reval; if (nfsi->npages != 0) return 0; - return nfs_revalidate_inode(server, inode); + if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + return 0; force_reval: return __nfs_revalidate_inode(server, inode); } @@ -198,7 +226,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + result = nfs_revalidate_file(inode, iocb->ki_filp); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); return result; @@ -216,7 +244,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_file(inode, filp); if (!res) res = generic_file_sendfile(filp, ppos, count, actor, target); return res; @@ -232,7 +260,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + status = nfs_revalidate_file(inode, file); if (!status) status = generic_file_mmap(file, vma); return status; @@ -321,9 +349,15 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result) - goto out; + /* + * O_APPEND implies that we must revalidate the file length. + */ + if (iocb->ki_filp->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (result) + goto out; + } + nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 87f4f9aeac8..ffb8df91dc3 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -50,6 +50,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> +#include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f2317f3e29f..4845911f1c6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -39,6 +39,7 @@ #include <asm/system.h> #include <asm/uaccess.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -63,6 +64,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -106,6 +108,21 @@ static struct rpc_program nfs_program = { .pipe_dir_name = "/nfs", }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -118,7 +135,7 @@ nfs_write_inode(struct inode *inode, int sync) int flags = sync ? FLUSH_WAIT : 0; int ret; - ret = nfs_commit_inode(inode, 0, 0, flags); + ret = nfs_commit_inode(inode, flags); if (ret < 0) return ret; return 0; @@ -140,10 +157,6 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { @@ -152,6 +165,7 @@ nfs_clear_inode(struct inode *inode) nfs_wb_all(inode); BUG_ON (!list_empty(&nfsi->open_files)); + nfs_zap_acl_cache(inode); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -161,11 +175,13 @@ nfs_clear_inode(struct inode *inode) void nfs_umount_begin(struct super_block *sb) { - struct nfs_server *server = NFS_SB(sb); - struct rpc_clnt *rpc; + struct rpc_clnt *rpc = NFS_SB(sb)->client; /* -EIO all pending I/O */ - if ((rpc = server->client) != NULL) + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = NFS_SB(sb)->client_acl; + if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } @@ -366,13 +382,15 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, &server->addr, &timeparms); if (IS_ERR(xprt)) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); goto out_fail; } @@ -383,7 +401,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) return clnt; out_fail: - xprt_destroy(xprt); return clnt; } @@ -427,21 +444,16 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ - if (server->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) { server->rpc_ops = &nfs_v3_clientops; server->caps |= NFS_CAP_READDIRPLUS; - if (data->version < 4) { - printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); - return -EIO; - } -#else - printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); - return -EIO; -#endif } else { server->rpc_ops = &nfs_v2_clientops; } +#else + server->rpc_ops = &nfs_v2_clientops; +#endif /* Fill in pseudoflavor for mount version < 5 */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) @@ -455,17 +467,34 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { + struct rpc_auth *auth; + server->client_sys = rpc_clone_client(server->client); if (IS_ERR(server->client_sys)) return PTR_ERR(server->client_sys); - if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) - return -ENOMEM; + auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); + if (IS_ERR(auth)) + return PTR_ERR(auth); } else { atomic_inc(&server->client->cl_count); server->client_sys = server->client; } - if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3_ACL + if (!(server->flags & NFS_MOUNT_NOACL)) { + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + /* No errors! Assume that Sun nfsacls are supported */ + if (!IS_ERR(server->client_acl)) + server->caps |= NFS_CAP_ACLS; + } +#else + server->flags &= ~NFS_MOUNT_NOACL; +#endif /* CONFIG_NFS_V3_ACL */ + /* + * The VFS shouldn't apply the umask to mode bits. We will + * do so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; @@ -549,6 +578,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; @@ -590,9 +620,19 @@ nfs_zap_caches(struct inode *inode) memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; +} + +static void nfs_zap_acl_cache(struct inode *inode) +{ + void (*clear_acl_cache)(struct inode *); + + clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; + if (clear_acl_cache != NULL) + clear_acl_cache(inode); + NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; } /* @@ -689,7 +729,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -792,7 +832,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) } } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; nfs_end_data_update(inode); unlock_kernel(); return error; @@ -851,7 +891,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; - init_waitqueue_head(&ctx->waitq); + ctx->dir_cookie = 0; } return ctx; } @@ -1015,6 +1055,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; } flags = nfsi->flags; + nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). @@ -1022,21 +1063,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (verifier == nfsi->cache_change_attribute) nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); /* Do the page cache invalidation */ - if (flags & NFS_INO_INVALID_DATA) { - if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); - nfs_wb_all(inode); - } - nfsi->flags &= ~NFS_INO_INVALID_DATA; - invalidate_inode_pages2(inode->i_mapping); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); - /* This ensures we revalidate dentries */ - nfsi->cache_change_attribute++; - } + nfs_revalidate_mapping(inode, inode->i_mapping); + if (flags & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1074,6 +1103,34 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } /** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfsi->flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(mapping) == 0) + filemap_fdatawait(mapping); + nfs_wb_all(inode); + } + invalidate_inode_pages2(mapping); + nfsi->flags &= ~NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) { + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + /* This ensures we revalidate child dentries */ + nfsi->cache_change_attribute++; + } + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + } +} + +/** * nfs_begin_data_update * @inode - pointer to inode * Declare that a set of operations will update file data on the server @@ -1106,27 +1163,6 @@ void nfs_end_data_update(struct inode *inode) } /** - * nfs_end_data_update_defer - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will defer marking the inode as needing revalidation - * unless there are no other pending updates. - */ -void nfs_end_data_update_defer(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - if (atomic_dec_and_test(&nfsi->data_updates)) { - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute ++; - } -} - -/** * nfs_refresh_inode - verify consistency of the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes @@ -1152,8 +1188,11 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; - if (!data_unstable && nfsi->change_attr != fattr->change_attr) + if (nfsi->change_attr != fattr->change_attr) { nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } } if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -1176,18 +1215,22 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } /* Verify a few of the more important attributes */ - if (!data_unstable) { - if (!timespec_equal(&inode->i_mtime, &fattr->mtime) - || cur_size != new_isize) - nfsi->flags |= NFS_INO_INVALID_ATTR; - } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) - nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } + if (cur_size != new_isize) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (nfsi->npages == 0) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) @@ -1215,10 +1258,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { struct nfs_inode *nfsi = NFS_I(inode); - __u64 new_size; - loff_t new_isize; + loff_t cur_isize, new_isize; unsigned int invalid = 0; - loff_t cur_isize; int data_unstable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", @@ -1251,61 +1292,56 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign /* Are we racing with known updates of the metadata on the server? */ data_unstable = ! nfs_verify_change_attribute(inode, verifier); - /* Check if the file size agrees */ - new_size = fattr->size; + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); - if (cur_isize != new_size) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif - /* - * If we have pending writebacks, things can get - * messy. - */ - if (S_ISREG(inode->i_mode) && data_unstable) { - if (new_isize > cur_isize) { + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes? */ + if (nfsi->npages == 0) { + /* No, but did we race with nfs_end_data_update()? */ + if (verifier == nfsi->cache_change_attribute) { inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_DATA; } - } else { + invalid |= NFS_INO_INVALID_ATTR; + } else if (new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } + dprintk("NFS: isize change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); } - /* - * Note: we don't check inode->i_mtime since pipes etc. - * can change this value in VFS without requiring a - * cache revalidation. - */ + /* Check if the mtime agrees */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); if (!data_unstable) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) && nfsi->change_attr != fattr->change_attr) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", + dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif nfsi->change_attr = fattr->change_attr; if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { + if (!data_unstable) + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; @@ -1385,74 +1421,95 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { int error; - struct nfs_server *server; + struct nfs_server *server = NULL; struct super_block *s; struct nfs_fh *root; struct nfs_mount_data *data = raw_data; - if (!data) { - printk("nfs_read_super: missing data argument\n"); - return ERR_PTR(-EINVAL); + s = ERR_PTR(-EINVAL); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + goto out_err; + } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + goto out_err; } + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + goto out_err; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + goto out_err; + } + case 5: + memset(data->context, 0, sizeof(data->context)); + } +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + s = ERR_PTR(-EPROTONOSUPPORT); + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + goto out_err; + } +#endif /* CONFIG_NFS_V3 */ + s = ERR_PTR(-ENOMEM); server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - return ERR_PTR(-ENOMEM); + goto out_err; memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - - if (data->version != NFS_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS_MOUNT_VERSION ? "older" : "newer"); - if (data->version < 2) - data->namlen = 0; - if (data->version < 3) - data->bsize = 0; - if (data->version < 4) { - data->flags &= ~NFS_MOUNT_VER3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - } - if (data->version < 5) - data->flags &= ~NFS_MOUNT_SECFLAVOUR; - } + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); root = &server->fh; if (data->flags & NFS_MOUNT_VER3) root->size = data->root.size; else root->size = NFS2_FHSIZE; + s = ERR_PTR(-EINVAL); if (root->size > sizeof(root->data)) { - printk("nfs_get_sb: invalid root filehandle\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + goto out_err; } memcpy(root->data, data->root.data, root->size); /* We now require that the mount process passes the remote address */ memcpy(&server->addr, &data->addr, sizeof(server->addr)); if (server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote address!\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + goto out_err; } - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) { - kfree(server); - return s; + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_err; } - s->s_flags = flags; + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s) || s->s_root) + goto out_rpciod_down; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - kfree(server); - return ERR_PTR(-EIO); - } + s->s_flags = flags; error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { @@ -1462,6 +1519,11 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, } s->s_flags |= MS_ACTIVE; return s; +out_rpciod_down: + rpciod_down(); +out_err: + kfree(server); + return s; } static void nfs_kill_super(struct super_block *s) @@ -1470,10 +1532,12 @@ static void nfs_kill_super(struct super_block *s) kill_anon_super(s); - if (server->client != NULL && !IS_ERR(server->client)) + if (!IS_ERR(server->client)) rpc_shutdown_client(server->client); - if (server->client_sys != NULL && !IS_ERR(server->client_sys)) + if (!IS_ERR(server->client_sys)) rpc_shutdown_client(server->client_sys); + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ @@ -1594,15 +1658,19 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { - printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return -EIO; } /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen > 1) - printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n"); + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { err = -EFAULT; goto out_fail; @@ -1610,21 +1678,22 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } down_write(&clp->cl_sem); - if (clp->cl_rpcclient == NULL) { + if (IS_ERR(clp->cl_rpcclient)) { xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); err = PTR_ERR(xprt); + dprintk("%s: cannot create RPC transport. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, authflavour); if (IS_ERR(clnt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt->cl_intr = 1; @@ -1656,21 +1725,26 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp = NULL; if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - return PTR_ERR(clnt); + err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + return err; } server->client = clnt; if (server->nfs4_state->cl_idmap == NULL) { - printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + dprintk("%s: failed to create idmapper.\n", __FUNCTION__); return -ENOMEM; } if (clnt->cl_auth->au_flavor != authflavour) { - if (rpcauth_create(authflavour, clnt) == NULL) { - printk(KERN_WARNING "NFS: couldn't create credcache!\n"); - return -ENOMEM; + struct rpc_auth *auth; + + auth = rpcauth_create(authflavour, clnt); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return PTR_ERR(auth); } } @@ -1730,8 +1804,12 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, struct nfs4_mount_data *data = raw_data; void *p; - if (!data) { - printk("nfs_read_super: missing data argument\n"); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); return ERR_PTR(-EINVAL); } @@ -1741,11 +1819,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - - if (data->version != NFS4_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); - } + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) @@ -1773,11 +1847,20 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, } if (server->addr.sin_family != AF_INET || server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote IP address!\n"); + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); s = ERR_PTR(-EINVAL); goto out_free; } + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_free; + } + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); if (IS_ERR(s) || s->s_root) @@ -1785,13 +1868,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, s->s_flags = flags; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - s = ERR_PTR(-EIO); - goto out_free; - } - error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); @@ -1875,6 +1951,13 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) if (!nfsi) return NULL; nfsi->flags = 0; +#ifdef CONFIG_NFS_V3_ACL + nfsi->acl_access = ERR_PTR(-EAGAIN); + nfsi->acl_default = ERR_PTR(-EAGAIN); +#endif +#ifdef CONFIG_NFS_V4 + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 9d3ddad96d9..0e82617f2de 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, clnt = rpc_create_client(xprt, hostname, &mnt_program, version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); - } else { + if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c new file mode 100644 index 00000000000..ee3536fc84a --- /dev/null +++ b/fs/nfs/nfs3acl.c @@ -0,0 +1,403 @@ +#include <linux/fs.h> +#include <linux/nfs.h> +#include <linux/nfs3.h> +#include <linux/nfs_fs.h> +#include <linux/xattr_acl.h> +#include <linux/nfsacl.h> + +#define NFSDBG_FACILITY NFSDBG_PROC + +ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int pos=0, len=0; + +# define output(s) do { \ + if (pos + sizeof(s) <= size) { \ + memcpy(buffer + pos, s, sizeof(s)); \ + pos += sizeof(s); \ + } \ + len += sizeof(s); \ + } while(0) + + acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_access"); + posix_acl_release(acl); + } + + if (S_ISDIR(inode->i_mode)) { + acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_default"); + posix_acl_release(acl); + } + } + +# undef output + + if (!buffer || len <= size) + return len; + return -ERANGE; +} + +ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error = 0; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = nfs3_proc_getacl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + if (type == ACL_TYPE_ACCESS && acl->a_count == 0) + error = -ENODATA; + else + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + } else + error = -ENODATA; + + return error; +} + +int nfs3_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + error = nfs3_proc_setacl(inode, type, acl); + posix_acl_release(acl); + + return error; +} + +int nfs3_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + int type; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + return nfs3_proc_setacl(inode, type, NULL); +} + +static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) +{ + if (!IS_ERR(nfsi->acl_access)) { + posix_acl_release(nfsi->acl_access); + nfsi->acl_access = ERR_PTR(-EAGAIN); + } + if (!IS_ERR(nfsi->acl_default)) { + posix_acl_release(nfsi->acl_default); + nfsi->acl_default = ERR_PTR(-EAGAIN); + } +} + +void nfs3_forget_cached_acls(struct inode *inode) +{ + dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, + inode->i_ino); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct posix_acl *acl = ERR_PTR(-EINVAL); + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = nfsi->acl_access; + break; + + case ACL_TYPE_DEFAULT: + acl = nfsi->acl_default; + break; + + default: + goto out; + } + if (IS_ERR(acl)) + acl = ERR_PTR(-EAGAIN); + else + acl = posix_acl_dup(acl); +out: + spin_unlock(&inode->i_lock); + dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, + inode->i_ino, type, acl); + return acl; +} + +static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id, + inode->i_ino, acl, dfacl); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + nfsi->acl_access = posix_acl_dup(acl); + nfsi->acl_default = posix_acl_dup(dfacl); + spin_unlock(&inode->i_lock); +} + +struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_getaclargs args = { + .fh = NFS_FH(inode), + /* The xdr layer may allocate pages here. */ + .pages = pages, + }; + struct nfs3_getaclres res = { + .fattr = &fattr, + }; + struct posix_acl *acl; + int status, count; + + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + return ERR_PTR(-EOPNOTSUPP); + + status = nfs_revalidate_inode(server, inode); + if (status < 0) + return ERR_PTR(status); + acl = nfs3_get_cached_acl(inode, type); + if (acl != ERR_PTR(-EAGAIN)) + return acl; + acl = NULL; + + /* + * Only get the access acl when explicitly requested: We don't + * need it for access decisions, and only some applications use + * it. Applications which request the access acl first are not + * penalized from this optimization. + */ + if (type == ACL_TYPE_ACCESS) + args.mask |= NFS_ACLCNT|NFS_ACL; + if (S_ISDIR(inode->i_mode)) + args.mask |= NFS_DFACLCNT|NFS_DFACL; + if (args.mask == 0) + return NULL; + + dprintk("NFS call getacl\n"); + status = rpc_call(server->client_acl, ACLPROC3_GETACL, + &args, &res, 0); + dprintk("NFS reply getacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL extension not supported; disabling\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + default: + goto getout; + } + if ((args.mask & res.mask) != args.mask) { + status = -EIO; + goto getout; + } + + if (res.acl_access != NULL) { + if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) { + posix_acl_release(res.acl_access); + res.acl_access = NULL; + } + } + nfs3_cache_acls(inode, res.acl_access, res.acl_default); + + switch(type) { + case ACL_TYPE_ACCESS: + acl = res.acl_access; + res.acl_access = NULL; + break; + + case ACL_TYPE_DEFAULT: + acl = res.acl_default; + res.acl_default = NULL; + } + +getout: + posix_acl_release(res.acl_access); + posix_acl_release(res.acl_default); + + if (status != 0) { + posix_acl_release(acl); + acl = ERR_PTR(status); + } + return acl; +} + +static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_setaclargs args = { + .inode = inode, + .mask = NFS_ACL, + .acl_access = acl, + .pages = pages, + }; + int status, count; + + status = -EOPNOTSUPP; + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + goto out; + + /* We are doing this here, because XDR marshalling can only + return -ENOMEM. */ + status = -ENOSPC; + if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (S_ISDIR(inode->i_mode)) { + args.mask |= NFS_DFACL; + args.acl_default = dfacl; + } + + dprintk("NFS call setacl\n"); + nfs_begin_data_update(inode); + status = rpc_call(server->client_acl, ACLPROC3_SETACL, + &args, &fattr, 0); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + nfs_end_data_update(inode); + dprintk("NFS reply setacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL SETACL RPC not supported" + "(will not retry)\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + } +out: + return status; +} + +int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *alloc = NULL, *dfacl = NULL; + int status; + + if (S_ISDIR(inode->i_mode)) { + switch(type) { + case ACL_TYPE_ACCESS: + alloc = dfacl = nfs3_proc_getacl(inode, + ACL_TYPE_DEFAULT); + if (IS_ERR(alloc)) + goto fail; + break; + + case ACL_TYPE_DEFAULT: + dfacl = acl; + alloc = acl = nfs3_proc_getacl(inode, + ACL_TYPE_ACCESS); + if (IS_ERR(alloc)) + goto fail; + break; + + default: + return -EINVAL; + } + } else if (type != ACL_TYPE_ACCESS) + return -EINVAL; + + if (acl == NULL) { + alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(alloc)) + goto fail; + } + status = nfs3_proc_setacls(inode, acl, dfacl); + posix_acl_release(alloc); + return status; + +fail: + return PTR_ERR(alloc); +} + +int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode) +{ + struct posix_acl *dfacl, *acl; + int error = 0; + + dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(dfacl)) { + error = PTR_ERR(dfacl); + return (error == -EOPNOTSUPP) ? 0 : error; + } + if (!dfacl) + return 0; + acl = posix_acl_clone(dfacl, GFP_KERNEL); + error = -ENOMEM; + if (!acl) + goto out_release_dfacl; + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out_release_acl; + error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? + dfacl : NULL); +out_release_acl: + posix_acl_release(acl); +out_release_dfacl: + posix_acl_release(dfacl); + return error; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3878494dfc2..7851569b31c 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -17,6 +17,7 @@ #include <linux/nfs_page.h> #include <linux/lockd/bind.h> #include <linux/smp_lock.h> +#include <linux/nfs_mount.h> #define NFSDBG_FACILITY NFSDBG_PROC @@ -45,7 +46,7 @@ static inline int nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) { struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[proc], + .rpc_proc = &clnt->cl_procinfo[proc], .rpc_argp = argp, .rpc_resp = resp, }; @@ -313,7 +314,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; - int status; + mode_t mode = sattr->ia_mode; + int status; dprintk("NFS call create %s\n", dentry->d_name.name); arg.createmode = NFS3_CREATE_UNCHECKED; @@ -323,6 +325,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, arg.verifier[1] = current->pid; } + sattr->ia_mode &= ~current->fs->umask; + again: dir_attr.valid = 0; fattr.valid = 0; @@ -369,6 +373,9 @@ again: nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: dprintk("NFS reply create: %d\n", status); return status; @@ -538,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) .fh = &fhandle, .fattr = &fattr }; - int status; + int mode = sattr->ia_mode; + int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); dir_attr.valid = 0; fattr.valid = 0; + + sattr->ia_mode &= ~current->fs->umask; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -641,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fh, .fattr = &fattr }; + mode_t mode = sattr->ia_mode; int status; switch (sattr->ia_mode & S_IFMT) { @@ -653,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); + + sattr->ia_mode &= ~current->fs->umask; + dir_attr.valid = 0; fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mknod: %d\n", status); return status; } @@ -825,7 +850,8 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, - .dir_inode_ops = &nfs_dir_inode_operations, + .dir_inode_ops = &nfs3_dir_inode_operations, + .file_inode_ops = &nfs3_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -856,4 +882,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, + .clear_acl_cache = nfs3_forget_cached_acls, }; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a3593d47e5a..db4a904810a 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -21,6 +21,7 @@ #include <linux/nfs.h> #include <linux/nfs3.h> #include <linux/nfs_fs.h> +#include <linux/nfsacl.h> #define NFSDBG_FACILITY NFSDBG_XDR @@ -79,6 +80,11 @@ extern int nfs_stat_to_errno(int); #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) #define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) +#define ACL3_getaclargs_sz (NFS3_fh_sz+1) +#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3)) +#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3)) +#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) + /* * Map file type to S_IFMT bits */ @@ -627,6 +633,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Encode GETACL arguments + */ +static int +nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->mask); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + if (args->mask & (NFS_ACL | NFS_DFACL)) { + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + + ACL3_getaclres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, + NFSACL_MAXPAGES << PAGE_SHIFT); + } + return 0; +} + +/* + * Encode SETACL arguments + */ +static int +nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_setaclargs *args) +{ + struct xdr_buf *buf = &req->rq_snd_buf; + unsigned int base, len_in_head, len = nfsacl_size( + (args->mask & NFS_ACL) ? args->acl_access : NULL, + (args->mask & NFS_DFACL) ? args->acl_default : NULL); + int count, err; + + p = xdr_encode_fhandle(p, NFS_FH(args->inode)); + *p++ = htonl(args->mask); + base = (char *)p - (char *)buf->head->iov_base; + /* put as much of the acls into head as possible. */ + len_in_head = min_t(unsigned int, buf->head->iov_len - base, len); + len -= len_in_head; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2)); + + for (count = 0; (count << PAGE_SHIFT) < len; count++) { + args->pages[count] = alloc_page(GFP_KERNEL); + if (!args->pages[count]) { + while (count) + __free_page(args->pages[--count]); + return -ENOMEM; + } + } + xdr_encode_pages(buf, args->pages, 0, len); + + err = nfsacl_encode(buf, base, args->inode, + (args->mask & NFS_ACL) ? + args->acl_access : NULL, 1, 0); + if (err > 0) + err = nfsacl_encode(buf, base + err, args->inode, + (args->mask & NFS_DFACL) ? + args->acl_default : NULL, 1, + NFS_ACL_DEFAULT); + return (err > 0) ? 0 : err; +} +#endif /* CONFIG_NFS_V3_ACL */ + /* * NFS XDR decode functions */ @@ -978,6 +1052,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Decode GETACL reply + */ +static int +nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclres *res) +{ + struct xdr_buf *buf = &req->rq_rcv_buf; + int status = ntohl(*p++); + struct posix_acl **acl; + unsigned int *aclcnt; + int err, base; + + if (status != 0) + return -nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + res->mask = ntohl(*p++); + if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + return -EINVAL; + base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; + + acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; + aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; + err = nfsacl_decode(buf, base, aclcnt, acl); + + acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; + aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; + if (err > 0) + err = nfsacl_decode(buf, base + err, aclcnt, acl); + return (err > 0) ? 0 : err; +} + +/* + * Decode setacl reply. + */ +static int +nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status = ntohl(*p++); + + if (status) + return -nfs_stat_to_errno(status); + xdr_decode_post_op_attr(p, fattr); + return 0; +} +#endif /* CONFIG_NFS_V3_ACL */ + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif @@ -1021,3 +1143,28 @@ struct rpc_version nfs_version3 = { .procs = nfs3_procedures }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_procinfo nfs3_acl_procedures[] = { + [ACLPROC3_GETACL] = { + .p_proc = ACLPROC3_GETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, + .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, + .p_timer = 1, + }, + [ACLPROC3_SETACL] = { + .p_proc = ACLPROC3_SETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, + .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, + .p_timer = 0, + }, +}; + +struct rpc_version nfsacl_version3 = { + .number = 3, + .nrprocs = sizeof(nfs3_acl_procedures)/ + sizeof(nfs3_acl_procedures[0]), + .procs = nfs3_acl_procedures, +}; +#endif /* CONFIG_NFS_V3_ACL */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h new file mode 100644 index 00000000000..ec1a22d7b87 --- /dev/null +++ b/fs/nfs/nfs4_fs.h @@ -0,0 +1,253 @@ +/* + * linux/fs/nfs/nfs4_fs.h + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4-specific filesystem definitions and declarations + */ + +#ifndef __LINUX_FS_NFS_NFS4_FS_H +#define __LINUX_FS_NFS_NFS4_FS_H + +#ifdef CONFIG_NFS_V4 + +struct idmap; + +/* + * In a seqid-mutating op, this macro controls which error return + * values trigger incrementation of the seqid. + * + * from rfc 3010: + * The client MUST monotonically increment the sequence number for the + * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE + * operations. This is true even in the event that the previous + * operation that used the sequence number received an error. The only + * exception to this rule is if the previous operation received one of + * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, + * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, + * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. + * + */ +#define seqid_mutating_err(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_BAD_XDR) && \ + ((err) != NFSERR_RESOURCE) && \ + ((err) != NFSERR_NOFILEHANDLE)) + +enum nfs4_client_state { + NFS4CLNT_OK = 0, +}; + +/* + * The nfs4_client identifies our client state to the server. + */ +struct nfs4_client { + struct list_head cl_servers; /* Global list of servers */ + struct in_addr cl_addr; /* Server identifier */ + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +}; + +/* + * NFS4 state_owners and lock_owners are simply labels for ordered + * sequences of RPC calls. Their sole purpose is to provide once-only + * semantics by allowing the server to identify replayed requests. + * + * The ->so_sema is held during all state_owner seqid-mutating operations: + * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize + * so_seqid. + */ +struct nfs4_state_owner { + struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs4_client *so_client; + u32 so_id; /* 32-bit identifier, unique */ + struct semaphore so_sema; + u32 so_seqid; /* protected by so_sema */ + atomic_t so_count; + + struct rpc_cred *so_cred; /* Associated cred */ + struct list_head so_states; + struct list_head so_delegations; +}; + +/* + * struct nfs4_state maintains the client-side state for a given + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). + * + * OPEN: + * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, + * we need to know how many files are open for reading or writing on a + * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) + */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + struct nfs4_state * ls_state; /* Pointer to open state */ + fl_owner_t ls_owner; /* POSIX lock owner */ +#define NFS_LOCK_INITIALIZED 1 + int ls_flags; + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, + NFS_DELEGATED_STATE, +}; + +struct nfs4_state { + struct list_head open_states; /* List of states for the same state_owner */ + struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ + + struct nfs4_state_owner *owner; /* Pointer to the open owner */ + struct inode *inode; /* Pointer to the inode */ + + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + spinlock_t state_lock; /* Protects the lock_states list */ + + nfs4_stateid stateid; + + unsigned int nreaders; + unsigned int nwriters; + int state; /* State on the server (R,W, or RW) */ + atomic_t count; +}; + + +struct nfs4_exception { + long timeout; + int retry; +}; + +struct nfs4_state_recovery_ops { + int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); + int (*recover_lock)(struct nfs4_state *, struct file_lock *); +}; + +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); + + +/* nfs4proc.c */ +extern int nfs4_map_errors(int err); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); + +extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; + +extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[2]; +extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[2]; + +/* nfs4renewd.c */ +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_renew_state(void *); + +/* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); +extern struct nfs4_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_drop_state_owner(struct nfs4_state_owner *); +extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); +extern void nfs4_put_open_state(struct nfs4_state *); +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + +extern const nfs4_stateid zero_stateid; + +/* nfs4xdr.c */ +extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +struct nfs4_mount_data; + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; + +#else + +#define init_nfsv4_state(server) do { } while (0) +#define destroy_nfsv4_state(server) do { } while (0) +#define nfs4_put_state_owner(inode, owner) do { } while (0) +#define nfs4_put_open_state(state) do { } while (0) +#define nfs4_close_state(a, b) do { } while (0) + +#endif /* CONFIG_NFS_V4 */ +#endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1d5cb3e80c3..1b76f80aedb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -48,6 +48,7 @@ #include <linux/smp_lock.h> #include <linux/namei.h> +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -62,8 +63,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; -extern nfs4_stateid zero_stateid; - /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) { @@ -104,7 +103,7 @@ const u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_pathconf_bitmap[2] = { +const u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 @@ -124,7 +123,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, BUG_ON(readdir->count < 80); if (cookie > 2) { - readdir->cookie = (cookie > 2) ? cookie : 0; + readdir->cookie = cookie; memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); return; } @@ -270,14 +269,9 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta int err; do { err = _nfs4_open_reclaim(sp, state); - switch (err) { - case 0: - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - return err; - } - err = nfs4_handle_exception(server, err, &exception); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -509,6 +503,20 @@ out_stale: goto out_nodeleg; } +static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +{ + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_open_expired(sp, state, dentry); + if (err == -NFS4ERR_DELAY) + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -521,7 +529,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta continue; get_nfs_open_context(ctx); spin_unlock(&state->inode->i_lock); - status = _nfs4_open_expired(sp, state, ctx->dentry); + status = nfs4_do_open_expired(sp, state, ctx->dentry); put_nfs_open_context(ctx); return status; } @@ -748,11 +756,10 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, fattr->valid = 0; - if (state != NULL) + if (state != NULL) { msg.rpc_cred = state->owner->so_cred; - if (sattr->ia_valid & ATTR_SIZE) - nfs4_copy_stateid(&arg.stateid, state, NULL); - else + nfs4_copy_stateid(&arg.stateid, state, current->files); + } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); return rpc_call_sync(server->client, &msg, 0); @@ -1116,47 +1123,31 @@ static int nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { - struct inode * inode = dentry->d_inode; - int size_change = sattr->ia_valid & ATTR_SIZE; - struct nfs4_state *state = NULL; - int need_iput = 0; + struct rpc_cred *cred; + struct inode *inode = dentry->d_inode; + struct nfs4_state *state; int status; fattr->valid = 0; - if (size_change) { - struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); + cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + /* Search for an existing WRITE delegation first */ + state = nfs4_open_delegated(inode, FMODE_WRITE, cred); + if (!IS_ERR(state)) { + /* NB: nfs4_open_delegated() bumps the inode->i_count */ + iput(inode); + } else { + /* Search for an existing open(O_WRITE) stateid */ state = nfs4_find_state(inode, cred, FMODE_WRITE); - if (state == NULL) { - state = nfs4_open_delegated(dentry->d_inode, - FMODE_WRITE, cred); - if (IS_ERR(state)) - state = nfs4_do_open(dentry->d_parent->d_inode, - dentry, FMODE_WRITE, - NULL, cred); - need_iput = 1; - } - put_rpccred(cred); - if (IS_ERR(state)) - return PTR_ERR(state); - - if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); - status = -EIO; - goto out; - } } + status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); -out: - if (state) { - inode = state->inode; + if (state != NULL) nfs4_close_state(state, FMODE_WRITE); - if (need_iput) - iput(inode); - } + put_rpccred(cred); return status; } @@ -1731,6 +1722,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; + dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name, + (unsigned long long)cookie); lock_kernel(); nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; @@ -1738,6 +1733,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); unlock_kernel(); + dprintk("%s: returns %d\n", __FUNCTION__, status); return status; } @@ -2163,6 +2159,193 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp) return 0; } +static inline int nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on + * the stack. + */ +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) + +static void buf_to_pages(const void *buf, size_t buflen, + struct page **pages, unsigned int *pgbase) +{ + const void *p = buf; + + *pgbase = offset_in_page(buf); + p -= *pgbase; + while (p < buf + buflen) { + *(pages++) = virt_to_page(p); + p += PAGE_CACHE_SIZE; + } +} + +struct nfs4_cached_acl { + int cached; + size_t len; + char data[0]; +}; + +static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + kfree(nfsi->nfs4_acl); + nfsi->nfs4_acl = acl; + spin_unlock(&inode->i_lock); +} + +static void nfs4_zap_acl_attr(struct inode *inode) +{ + nfs4_set_cached_acl(inode, NULL); +} + +static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_cached_acl *acl; + int ret = -ENOENT; + + spin_lock(&inode->i_lock); + acl = nfsi->nfs4_acl; + if (acl == NULL) + goto out; + if (buf == NULL) /* user is just asking for length */ + goto out_len; + if (acl->cached == 0) + goto out; + ret = -ERANGE; /* see getxattr(2) man page */ + if (acl->len > buflen) + goto out; + memcpy(buf, acl->data, acl->len); +out_len: + ret = acl->len; +out: + spin_unlock(&inode->i_lock); + return ret; +} + +static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) +{ + struct nfs4_cached_acl *acl; + + if (buf && acl_len <= PAGE_SIZE) { + acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 1; + memcpy(acl->data, buf, acl_len); + } else { + acl = kmalloc(sizeof(*acl), GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 0; + } + acl->len = acl_len; +out: + nfs4_set_cached_acl(inode, acl); +} + +static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_getaclargs args = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + size_t resp_len = buflen; + void *resp_buf; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = &args, + .rpc_resp = &resp_len, + }; + struct page *localpage = NULL; + int ret; + + if (buflen < PAGE_SIZE) { + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + localpage = alloc_page(GFP_KERNEL); + resp_buf = page_address(localpage); + if (localpage == NULL) + return -ENOMEM; + args.acl_pages[0] = localpage; + args.acl_pgbase = 0; + args.acl_len = PAGE_SIZE; + } else { + resp_buf = buf; + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + } + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (ret) + goto out_free; + if (resp_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, resp_len); + else + nfs4_write_cached_acl(inode, resp_buf, resp_len); + if (buf) { + ret = -ERANGE; + if (resp_len > buflen) + goto out_free; + if (localpage) + memcpy(buf, resp_buf, resp_len); + } + ret = resp_len; +out_free: + if (localpage) + __free_page(localpage); + return ret; +} + +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + ret = nfs_revalidate_inode(server, inode); + if (ret < 0) + return ret; + ret = nfs4_read_cached_acl(inode, buf, buflen); + if (ret != -ENOENT) + return ret; + return nfs4_get_acl_uncached(inode, buf, buflen); +} + +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + if (ret == 0) + nfs4_write_cached_acl(inode, buf, buflen); + return ret; +} + static int nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) { @@ -2448,14 +2631,11 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (lsp) - nlo.id = lsp->ls_id; - else { - spin_lock(&clp->cl_lock); - nlo.id = nfs4_alloc_lockowner_id(clp); - spin_unlock(&clp->cl_lock); - } + status = nfs4_set_lock_state(state, request); + if (status != 0) + goto out; + lsp = request->fl_u.nfs4_fl.owner; + nlo.id = lsp->ls_id; arg.u.lockt = &nlo; status = rpc_call_sync(server->client, &msg, 0); if (!status) { @@ -2476,8 +2656,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock request->fl_pid = 0; status = 0; } - if (lsp) - nfs4_put_lock_state(lsp); +out: up(&state->lock_sema); up_read(&clp->cl_sem); return status; @@ -2537,28 +2716,26 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock }; struct nfs4_lock_state *lsp; struct nfs_locku_opargs luargs; - int status = 0; + int status; down_read(&clp->cl_sem); down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (!lsp) + status = nfs4_set_lock_state(state, request); + if (status != 0) goto out; + lsp = request->fl_u.nfs4_fl.owner; /* We might have lost the locks! */ - if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); - } + if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_lock_seqid(status, lsp); - if (status == 0) { + if (status == 0) memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(lsp->ls_stateid)); - nfs4_notify_unlck(state, request, lsp); - } - nfs4_put_lock_state(lsp); out: up(&state->lock_sema); if (status == 0) @@ -2584,7 +2761,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_lock_state *lsp; + struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), @@ -2606,9 +2783,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r }; int status; - lsp = nfs4_get_lock_state(state, request->fl_owner); - if (lsp == NULL) - return -ENOMEM; if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { @@ -2630,38 +2804,57 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r * seqid mutating errors */ nfs4_increment_seqid(status, owner); up(&owner->so_sema); + if (status == 0) { + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + lsp->ls_seqid++; + } } else { struct nfs_exist_lock el = { .seqid = lsp->ls_seqid, }; memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); largs.u.exist_lock = ⪙ - largs.new_lock_owner = 0; arg.u.lock = &largs; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); } - /* increment seqid on success, and * seqid mutating errors*/ - nfs4_increment_lock_seqid(status, lsp); /* save the returned stateid. */ - if (status == 0) { + if (status == 0) memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - if (!reclaim) - nfs4_notify_setlk(state, request, lsp); - } else if (status == -NFS4ERR_DENIED) + else if (status == -NFS4ERR_DENIED) status = -EAGAIN; - nfs4_put_lock_state(lsp); return status; } static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) { - return _nfs4_do_setlk(state, F_SETLK, request, 1); + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_setlk(state, F_SETLK, request, 1); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; } static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) { - return _nfs4_do_setlk(state, F_SETLK, request, 0); + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_setlk(state, F_SETLK, request, 0); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; } static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) @@ -2671,7 +2864,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); down(&state->lock_sema); - status = _nfs4_do_setlk(state, cmd, request, 0); + status = nfs4_set_lock_state(state, request); + if (status == 0) + status = _nfs4_do_setlk(state, cmd, request, 0); up(&state->lock_sema); if (status == 0) { /* Note: we always want to sleep here! */ @@ -2729,10 +2924,53 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (signalled()) break; } while(status < 0); - return status; } + +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + +int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + + return nfs4_proc_set_acl(inode, buf, buflen); +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); +} + +ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; +} + struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, @@ -2743,10 +2981,20 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { .recover_lock = nfs4_lock_expired, }; +static struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, @@ -2777,6 +3025,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .file_open = nfs4_proc_file_open, .file_release = nfs4_proc_file_release, .lock = nfs4_proc_lock, + .clear_acl_cache = nfs4_zap_acl_attr, }; /* diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 667e06f1c64..a3001628ad3 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -53,6 +53,7 @@ #include <linux/nfs.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PROC diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 231cebce3c8..afe587d82f1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -46,24 +46,18 @@ #include <linux/workqueue.h> #include <linux/bitops.h> +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #define OPENOWNER_POOL_SIZE 8 -static DEFINE_SPINLOCK(state_spinlock); - -nfs4_stateid zero_stateid; - -#if 0 -nfs4_stateid one_stateid = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -#endif +const nfs4_stateid zero_stateid; +static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); static void nfs4_recover_state(void *); -extern void nfs4_renew_state(void *); void init_nfsv4_state(struct nfs_server *server) @@ -116,6 +110,7 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_superblocks); init_waitqueue_head(&clp->cl_waitq); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_OK; return clp; @@ -137,7 +132,7 @@ nfs4_free_client(struct nfs4_client *clp) if (clp->cl_cred) put_rpccred(clp->cl_cred); nfs_idmap_delete(clp); - if (clp->cl_rpcclient) + if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); nfs_callback_down(); @@ -365,7 +360,7 @@ nfs4_alloc_open_state(void) atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); init_MUTEX(&state->lock_sema); - rwlock_init(&state->state_lock); + spin_lock_init(&state->state_lock); return state; } @@ -547,16 +542,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } -struct nfs4_lock_state * -nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) -{ - struct nfs4_lock_state *lsp; - read_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); - read_unlock(&state->state_lock); - return lsp; -} - /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -573,14 +558,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f return NULL; lsp->ls_flags = 0; lsp->ls_seqid = 0; /* arbitrary */ - lsp->ls_id = -1; memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; - INIT_LIST_HEAD(&lsp->ls_locks); spin_lock(&clp->cl_lock); lsp->ls_id = nfs4_alloc_lockowner_id(clp); spin_unlock(&clp->cl_lock); + INIT_LIST_HEAD(&lsp->ls_locks); return lsp; } @@ -590,121 +574,112 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f * * The caller must be holding state->lock_sema and clp->cl_sem */ -struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { - struct nfs4_lock_state * lsp; + struct nfs4_lock_state *lsp, *new = NULL; - lsp = nfs4_find_lock_state(state, owner); - if (lsp == NULL) - lsp = nfs4_alloc_lock_state(state, owner); + for(;;) { + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, owner); + if (lsp != NULL) + break; + if (new != NULL) { + new->ls_state = state; + list_add(&new->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + lsp = new; + new = NULL; + break; + } + spin_unlock(&state->state_lock); + new = nfs4_alloc_lock_state(state, owner); + if (new == NULL) + return NULL; + } + spin_unlock(&state->state_lock); + kfree(new); return lsp; } /* - * Byte-range lock aware utility to initialize the stateid of read/write - * requests. + * Release reference to lock_state, and free it if we see that + * it is no longer in use */ -void -nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { - if (test_bit(LK_STATE_IN_USE, &state->flags)) { - struct nfs4_lock_state *lsp; + struct nfs4_state *state; - lsp = nfs4_find_lock_state(state, fl_owner); - if (lsp) { - memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); - nfs4_put_lock_state(lsp); - return; - } - } - memcpy(dst, &state->stateid, sizeof(*dst)); + if (lsp == NULL) + return; + state = lsp->ls_state; + if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + return; + list_del(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + spin_unlock(&state->state_lock); + kfree(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; -} + struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; -/* -* Check to see if the request lock (type FL_UNLK) effects the fl lock. -* -* fl and request must have the same posix owner -* -* return: -* 0 -> fl not effected by request -* 1 -> fl consumed by request -*/ + dst->fl_u.nfs4_fl.owner = lsp; + atomic_inc(&lsp->ls_count); +} -static int -nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +static void nfs4_fl_release_lock(struct file_lock *fl) { - if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) - return 1; - return 0; + nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); } -/* - * Post an initialized lock_state on the state->lock_states list. - */ -void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +static struct file_lock_operations nfs4_fl_lock_ops = { + .fl_copy_lock = nfs4_fl_copy_lock, + .fl_release_private = nfs4_fl_release_lock, +}; + +int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) { - if (!list_empty(&lsp->ls_locks)) - return; - atomic_inc(&lsp->ls_count); - write_lock(&state->state_lock); - list_add(&lsp->ls_locks, &state->lock_states); - set_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + struct nfs4_lock_state *lsp; + + if (fl->fl_ops != NULL) + return 0; + lsp = nfs4_get_lock_state(state, fl->fl_owner); + if (lsp == NULL) + return -ENOMEM; + fl->fl_u.nfs4_fl.owner = lsp; + fl->fl_ops = &nfs4_fl_lock_ops; + return 0; } -/* - * to decide to 'reap' lock state: - * 1) search i_flock for file_locks with fl.lock_state = to ls. - * 2) determine if unlock will consume found lock. - * if so, reap - * - * else, don't reap. - * +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. */ -void -nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) { - struct inode *inode = state->inode; - struct file_lock *fl; + struct nfs4_lock_state *lsp; - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) - continue; - if (fl->fl_owner != lsp->ls_owner) - continue; - /* Exit if we find at least one lock which is not consumed */ - if (nfs4_check_unlock(fl,request) == 0) - return; - } + memcpy(dst, &state->stateid, sizeof(*dst)); + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) + return; - write_lock(&state->state_lock); - list_del_init(&lsp->ls_locks); - if (list_empty(&state->lock_states)) - clear_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); } /* - * Release reference to lock_state, and free it if we see that - * it is no longer in use - */ -void -nfs4_put_lock_state(struct nfs4_lock_state *lsp) +* Called with state->lock_sema and clp->cl_sem held. +*/ +void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) { - if (!atomic_dec_and_test(&lsp->ls_count)) - return; - BUG_ON (!list_empty(&lsp->ls_locks)); - kfree(lsp); + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; } /* diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5f4de05763c..6c564ef9489 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -51,6 +51,7 @@ #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -82,12 +83,16 @@ static int nfs_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_fattr_bitmap_maxsz 3 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) -#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ - nfs4_fattr_bitmap_maxsz) +/* This is based on getfattr, which uses the most attributes: */ +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ + 3 + 3 + 3 + 2 * nfs4_name_maxsz)) +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_value_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) @@ -122,11 +127,11 @@ static int nfs_stat_to_errno(int); #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ nfs4_path_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) @@ -205,7 +210,7 @@ static int nfs_stat_to_errno(int); #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ - nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ @@ -360,6 +365,20 @@ static int nfs_stat_to_errno(int); encode_delegreturn_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_delegreturn_maxsz) +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -459,7 +478,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -660,8 +679,6 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1 static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fattr_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], bitmask[1] & nfs4_fattr_bitmap[1]); @@ -669,8 +686,6 @@ static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fsinfo_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], bitmask[1] & nfs4_fsinfo_bitmap[1]); } @@ -969,7 +984,6 @@ static int encode_putrootfh(struct xdr_stream *xdr) static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) { - extern nfs4_stateid zero_stateid; nfs4_stateid stateid; uint32_t *p; @@ -1000,6 +1014,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { struct rpc_auth *auth = req->rq_task->tk_auth; + uint32_t attrs[2] = { + FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, + FATTR4_WORD1_MOUNTED_ON_FILEID, + }; int replen; uint32_t *p; @@ -1010,13 +1028,20 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ WRITE32(readdir->count); WRITE32(2); - if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) { - WRITE32(0); - WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID); - } else { - WRITE32(FATTR4_WORD0_FILEID); - WRITE32(0); - } + /* Switch to mounted_on_fileid if the server supports it */ + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) + attrs[0] &= ~FATTR4_WORD0_FILEID; + else + attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + WRITE32(attrs[0] & readdir->bitmask[0]); + WRITE32(attrs[1] & readdir->bitmask[1]); + dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n", + __FUNCTION__, + (unsigned long long)readdir->cookie, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1], + attrs[0] & readdir->bitmask[0], + attrs[1] & readdir->bitmask[1]); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1025,6 +1050,9 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages, readdir->pgbase, readdir->count); + dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", + __FUNCTION__, replen, readdir->pages, + readdir->pgbase, readdir->count); return 0; } @@ -1089,6 +1117,25 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client } static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(2*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + if (arg->acl_len % 4) + return -EINVAL; + RESERVE_SPACE(4); + WRITE32(arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + return 0; +} + +static int encode_savefh(struct xdr_stream *xdr) { uint32_t *p; @@ -1632,6 +1679,34 @@ out: } /* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p, + struct nfs_getaclargs *args) +{ + struct xdr_stream xdr; + struct rpc_auth *auth = req->rq_task->tk_auth; + struct compound_hdr hdr = { + .nops = 2, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); + /* set up reply buffer: */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->acl_pages, args->acl_pgbase, args->acl_len); +out: + return status; +} + +/* * Encode a WRITE request */ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) @@ -1697,7 +1772,6 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs */ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args) { - extern u32 nfs4_pathconf_bitmap[2]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1718,7 +1792,6 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct */ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args) { - extern u32 nfs4_statfs_bitmap[]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -3003,6 +3076,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n return status; READ_BUF(8); COPYMEM(readdir->verifier.data, 8); + dprintk("%s: verifier = 0x%x%x\n", + __FUNCTION__, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1]); + hdrlen = (char *) p - (char *) iov->iov_base; recvd = rcvbuf->len - hdrlen; @@ -3017,12 +3095,14 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n for (nr = 0; *p++; nr++) { if (p + 3 > end) goto short_pkt; + dprintk("cookie = %Lu, ", *((unsigned long long *)p)); p += 2; /* cookie */ len = ntohl(*p++); /* filename length */ if (len > NFS4_MAXNAMLEN) { printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); goto err_unmap; } + dprintk("filename = %*s\n", len, (char *)p); p += XDR_QUADLEN(len); if (p + 1 > end) goto short_pkt; @@ -3042,6 +3122,7 @@ out: kunmap_atomic(kaddr, KM_USER0); return 0; short_pkt: + dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr); entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { @@ -3127,6 +3208,47 @@ static int decode_renew(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_RENEW); } +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, + size_t *acl_len) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + struct kvec *iov = req->rq_rcv_buf.head; + int status; + + *acl_len = 0; + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto out; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto out; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto out; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + int hdrlen, recvd; + + /* We ignore &savep and don't do consistency checks on + * the attr length. Let userspace figure it out.... */ + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (attrlen > recvd) { + printk(KERN_WARNING "NFS: server cheating in getattr" + " acl reply: attrlen %u > recvd %u\n", + attrlen, recvd); + return -EINVAL; + } + if (attrlen <= *acl_len) + xdr_read_pages(xdr, attrlen); + *acl_len = attrlen; + } + +out: + return status; +} + static int decode_savefh(struct xdr_stream *xdr) { @@ -3418,6 +3540,71 @@ out: } +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_setacl(&xdr, args); +out: + return status; +} +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, rqstp, acl_len); + +out: + return status; +} /* * Decode CLOSE response @@ -3895,6 +4082,12 @@ uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) } len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ if (len > 0) { + if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { + bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; + /* Ignore the return value of rdattr_error for now */ + p++; + len--; + } if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) xdr_decode_hyper(p, &entry->ino); else if (bitmap[0] == FATTR4_WORD0_FILEID) @@ -3934,6 +4127,8 @@ static struct { { NFS4ERR_DQUOT, EDQUOT }, { NFS4ERR_STALE, ESTALE }, { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BADOWNER, EINVAL }, + { NFS4ERR_BADNAME, EINVAL }, { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, { NFS4ERR_NOTSUPP, ENOTSUPP }, { NFS4ERR_TOOSMALL, ETOOSMALL }, @@ -4019,6 +4214,8 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), }; struct rpc_version nfs_version4 = { diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index fd5bc596fe8..1b272a135a3 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -124,6 +124,7 @@ enum { Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, /* Error token */ Opt_err }; @@ -158,6 +159,8 @@ static match_table_t __initdata tokens = { {Opt_udp, "udp"}, {Opt_tcp, "proto=tcp"}, {Opt_tcp, "tcp"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, {Opt_err, NULL} }; @@ -266,6 +269,12 @@ static int __init root_nfs_parse(char *name, char *buf) case Opt_tcp: nfs_data.flags |= NFS_MOUNT_TCP; break; + case Opt_acl: + nfs_data.flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + nfs_data.flags |= NFS_MOUNT_NOACL; + break; default : return 0; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4f1ba723848..d53857b148e 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -107,11 +107,38 @@ void nfs_unlock_request(struct nfs_page *req) smp_mb__before_clear_bit(); clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_clear_bit(); - wake_up_all(&req->wb_context->waitq); + wake_up_bit(&req->wb_flags, PG_BUSY); nfs_release_request(req); } /** + * nfs_set_page_writeback_locked - Lock a request for writeback + * @req: + */ +int nfs_set_page_writeback_locked(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + if (!nfs_lock_request(req)) + return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + return 1; +} + +/** + * nfs_clear_page_writeback - Unlock request and wake up sleepers + */ +void nfs_clear_page_writeback(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + spin_lock(&nfsi->req_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + spin_unlock(&nfsi->req_lock); + nfs_unlock_request(req); +} + +/** * nfs_clear_request - Free up all resources allocated to the request * @req: * @@ -150,34 +177,15 @@ nfs_release_request(struct nfs_page *req) nfs_page_free(req); } -/** - * nfs_list_add_request - Insert a request into a sorted list - * @req: request - * @head: head of list into which to insert the request. - * - * Note that the wb_list is sorted by page index in order to facilitate - * coalescing of requests. - * We use an insertion sort that is optimized for the case of appended - * writes. - */ -void -nfs_list_add_request(struct nfs_page *req, struct list_head *head) +static int nfs_wait_bit_interruptible(void *word) { - struct list_head *pos; + int ret = 0; -#ifdef NFS_PARANOIA - if (!list_empty(&req->wb_list)) { - printk(KERN_ERR "NFS: Add to list failed!\n"); - BUG(); - } -#endif - list_for_each_prev(pos, head) { - struct nfs_page *p = nfs_list_entry(pos); - if (p->wb_index < req->wb_index) - break; - } - list_add(&req->wb_list, pos); - req->wb_list_head = head; + if (signal_pending(current)) + ret = -ERESTARTSYS; + else + schedule(); + return ret; } /** @@ -190,12 +198,22 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head) int nfs_wait_on_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - - if (!NFS_WBACK_BUSY(req)) - return 0; - return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req)); + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); + sigset_t oldmask; + int ret = 0; + + if (!test_bit(PG_BUSY, &req->wb_flags)) + goto out; + /* + * Note: the call to rpc_clnt_sigmask() suffices to ensure that we + * are not interrupted if intr flag is not set + */ + rpc_clnt_sigmask(clnt, &oldmask); + ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE); + rpc_clnt_sigunmask(clnt, &oldmask); +out: + return ret; } /** @@ -243,6 +261,62 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst, return npages; } +#define NFS_SCAN_MAXENTRIES 16 +/** + * nfs_scan_lock_dirty - Scan the radix tree for dirty requests + * @nfsi: NFS inode + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's req_lock when calling this function + */ +int +nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages) +{ + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + unsigned long idx_end; + int found, i; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + for (;;) { + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, + NFS_PAGE_TAG_DIRTY); + if (found <= 0) + break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + + idx_start = req->wb_index + 1; + + if (nfs_set_page_writeback_locked(req)) { + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + } + } +out: + return res; +} + /** * nfs_scan_list - Scan a list for matching requests * @head: One of the NFS inode request lists @@ -280,7 +354,7 @@ nfs_scan_list(struct list_head *head, struct list_head *dst, if (req->wb_index > idx_end) break; - if (!nfs_lock_request(req)) + if (!nfs_set_page_writeback_locked(req)) continue; nfs_list_remove_request(req); nfs_list_add_request(req, dst); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index d31b4d6e5a5..cedf636bcf3 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -622,6 +622,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a0042fb5863..6f866b8aa2d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -173,7 +173,6 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, &one_request); nfs_pagein_one(&one_request, inode); return 0; @@ -185,7 +184,6 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_clear_request(req); nfs_release_request(req); - nfs_unlock_request(req); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, @@ -553,7 +551,6 @@ readpage_async_filler(void *data, struct page *page) } if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, desc->head); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6f7a4af3bc4..5130eda231d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -220,7 +220,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, ClearPageError(page); io_error: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return written ? written : result; } @@ -352,7 +352,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) if (err < 0) goto out; } - err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); + err = nfs_commit_inode(inode, wb_priority(wbc)); if (err > 0) { wbc->nr_to_write -= err; err = 0; @@ -401,7 +401,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); iput(inode); } else spin_unlock(&nfsi->req_lock); @@ -446,6 +446,8 @@ nfs_mark_request_dirty(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); @@ -503,13 +505,12 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int spin_lock(&nfsi->req_lock); next = idx_start; - while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; - if (!NFS_WBACK_BUSY(req)) - continue; + BUG_ON(!NFS_WBACK_BUSY(req)); atomic_inc(&req->wb_count); spin_unlock(&nfsi->req_lock); @@ -538,12 +539,15 @@ static int nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); - nfsi->ndirty -= res; - sub_page_state(nr_dirty,res); - if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + int res = 0; + + if (nfsi->ndirty != 0) { + res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); + nfsi->ndirty -= res; + sub_page_state(nr_dirty,res); + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + } return res; } @@ -562,11 +566,14 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); - nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + int res = 0; + + if (nfsi->ncommit != 0) { + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + nfsi->ncommit -= res; + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + } return res; } #endif @@ -750,7 +757,7 @@ int nfs_updatepage(struct file *file, struct page *page, * is entirely in cache, it may be more efficient to avoid * fragmenting write requests. */ - if (PageUptodate(page) && inode->i_flock == NULL) { + if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { loff_t end_offs = i_size_read(inode) - 1; unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; @@ -821,7 +828,7 @@ out: #else nfs_inode_remove_request(req); #endif - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } static inline int flush_task_priority(int how) @@ -952,7 +959,7 @@ out_bad: nfs_writedata_free(data); } nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); return -ENOMEM; } @@ -1002,7 +1009,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1029,7 +1036,7 @@ nfs_flush_list(struct list_head *head, int wpages, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return error; } @@ -1121,7 +1128,7 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) nfs_inode_remove_request(req); #endif next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } } @@ -1210,36 +1217,24 @@ static void nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; - struct nfs_page *first, *last; + struct nfs_page *first; struct inode *inode; - loff_t start, end, len; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - last = nfs_list_entry(data->pages.prev); inode = first->wb_context->dentry->d_inode; - /* - * Determine the offset range of requests in the COMMIT call. - * We rely on the fact that data->pages is an ordered list... - */ - start = req_offset(first); - end = req_offset(last) + last->wb_bytes; - len = end - start; - /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ - if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1)) - len = 0; - data->inode = inode; data->cred = first->wb_context->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; + /* Note: we always request a commit of the entire inode */ + data->args.offset = 0; + data->args.count = 0; + data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -1278,7 +1273,7 @@ nfs_commit_list(struct list_head *head, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_commit(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1324,7 +1319,7 @@ nfs_commit_done(struct rpc_task *task) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); res++; } sub_page_state(nr_unstable,res); @@ -1342,16 +1337,23 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, spin_lock(&nfsi->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); - if (res) - error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); + if (res) { + struct nfs_server *server = NFS_SERVER(inode); + + /* For single writes, FLUSH_STABLE is more efficient */ + if (res == nfsi->npages && nfsi->npages <= server->wpages) { + if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) + how |= FLUSH_STABLE; + } + error = nfs_flush_list(&head, server->wpages, how); + } if (error < 0) return error; return res; } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -int nfs_commit_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); @@ -1359,15 +1361,13 @@ int nfs_commit_inode(struct inode *inode, unsigned long idx_start, error = 0; spin_lock(&nfsi->req_lock); - res = nfs_scan_commit(inode, &head, idx_start, npages); + res = nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&nfsi->req_lock); if (res) { - res += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); error = nfs_commit_list(&head, how); - } else - spin_unlock(&nfsi->req_lock); - if (error < 0) - return error; + if (error < 0) + return error; + } return res; } #endif @@ -1389,7 +1389,7 @@ int nfs_sync_inode(struct inode *inode, unsigned long idx_start, error = nfs_flush_inode(inode, idx_start, npages, how); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (error == 0) - error = nfs_commit_inode(inode, idx_start, npages, how); + error = nfs_commit_inode(inode, how); #endif } while (error > 0); return error; |