From 22905f775dd6a8b73be99826dcad07ceec00244b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 16 Nov 2005 15:07:01 -0800 Subject: identify multipage ->writepages() calls NFS needs to be able to distinguish between single-page ->writepage() calls and multipage ->writepages() calls. For the single-page writepage calls NFS can kick off the I/O within the context of ->writepage(). For multipage ->writepages calls, nfs_writepage() will leave the I/O pending and nfs_writepages() will kick off the I/O when it all has been queued up within NFS. Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- include/linux/writeback.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 64a36ba43b2..b096159086e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -53,10 +53,11 @@ struct writeback_control { loff_t start; loff_t end; - unsigned nonblocking:1; /* Don't get stuck on request queues */ - unsigned encountered_congestion:1; /* An output: a queue is full */ - unsigned for_kupdate:1; /* A kupdate writeback */ - unsigned for_reclaim:1; /* Invoked from the page allocator */ + unsigned nonblocking:1; /* Don't get stuck on request queues */ + unsigned encountered_congestion:1; /* An output: a queue is full */ + unsigned for_kupdate:1; /* A kupdate writeback */ + unsigned for_reclaim:1; /* Invoked from the page allocator */ + unsigned for_writepages:1; /* This is a writepages() call */ }; /* -- cgit v1.2.3 From abbcf28f23d53e8ec56a91f3528743913fa2694a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:03 +0100 Subject: SUNRPC: Yet more RPC cleanups Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 4d77e90d0b3..4c4b2dc8aca 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -233,6 +233,7 @@ struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); void rpc_init_task(struct rpc_task *, struct rpc_clnt *, rpc_action exitfunc, int flags); void rpc_release_task(struct rpc_task *); +void rpc_exit_task(struct rpc_task *); void rpc_killall_tasks(struct rpc_clnt *); int rpc_execute(struct rpc_task *); void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, @@ -259,7 +260,7 @@ void rpc_destroy_mempool(void); static inline void rpc_exit(struct rpc_task *task, int status) { task->tk_status = status; - task->tk_action = NULL; + task->tk_action = rpc_exit_task; } #ifdef RPC_DEBUG -- cgit v1.2.3 From 963d8fe53339128ee46a7701f2e36305f0ccff8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:04 +0100 Subject: RPC: Clean up RPC task structure Shrink the RPC task structure. Instead of storing separate pointers for task->tk_exit and task->tk_release, put them in a structure. Also pass the user data pointer as a parameter instead of passing it via task->tk_calldata. This enables us to nest callbacks. Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 2 +- include/linux/nfs_fs.h | 12 +++++++----- include/linux/sunrpc/clnt.h | 3 ++- include/linux/sunrpc/sched.h | 20 ++++++++++++++------ 4 files changed, 24 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 16d4e5a08e1..95c8fea293b 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -172,7 +172,7 @@ extern struct nlm_host *nlm_find_client(void); /* * Server-side lock handling */ -int nlmsvc_async_call(struct nlm_rqst *, u32, rpc_action); +int nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_lock *, int, struct nlm_cookie *); u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 2516adeccec..4dff705d2ff 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -406,10 +406,12 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); -extern void nfs_writeback_done(struct rpc_task *task); +extern void nfs_writeback_done(struct rpc_task *task, void *data); +extern void nfs_writedata_release(void *data); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern void nfs_commit_done(struct rpc_task *); +extern void nfs_commit_done(struct rpc_task *, void *data); +extern void nfs_commit_release(void *data); #endif /* @@ -481,7 +483,9 @@ static inline void nfs_writedata_free(struct nfs_write_data *p) extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); -extern void nfs_readpage_result(struct rpc_task *); +extern void nfs_readpage_result(struct rpc_task *, void *); +extern void nfs_readdata_release(void *data); + /* * Allocate and free nfs_read_data structures @@ -501,8 +505,6 @@ static inline void nfs_readdata_free(struct nfs_read_data *p) mempool_free(p, nfs_rdata_mempool); } -extern void nfs_readdata_release(struct rpc_task *task); - /* * linux/fs/nfs3proc.c */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab151bbb66d..b0ab959eca6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -126,7 +126,8 @@ int rpc_register(u32, u32, int, unsigned short, int *); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, - int flags, rpc_action callback, void *clntdata); + int flags, const struct rpc_call_ops *tk_ops, + void *calldata); int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags); void rpc_restart_call(struct rpc_task *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 4c4b2dc8aca..581d8cdc3b8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -27,6 +27,7 @@ struct rpc_message { struct rpc_cred * rpc_cred; /* Credentials */ }; +struct rpc_call_ops; struct rpc_wait_queue; struct rpc_wait { struct list_head list; /* wait queue links */ @@ -61,13 +62,12 @@ struct rpc_task { * timeout_fn to be executed by timer bottom half * callback to be executed after waking up * action next procedure for async tasks - * exit exit async task and report to caller + * tk_ops caller callbacks */ void (*tk_timeout_fn)(struct rpc_task *); void (*tk_callback)(struct rpc_task *); void (*tk_action)(struct rpc_task *); - void (*tk_exit)(struct rpc_task *); - void (*tk_release)(struct rpc_task *); + const struct rpc_call_ops *tk_ops; void * tk_calldata; /* @@ -111,6 +111,12 @@ struct rpc_task { typedef void (*rpc_action)(struct rpc_task *); +struct rpc_call_ops { + void (*rpc_call_done)(struct rpc_task *, void *); + void (*rpc_release)(void *); +}; + + /* * RPC task flags */ @@ -228,10 +234,12 @@ struct rpc_wait_queue { /* * Function prototypes */ -struct rpc_task *rpc_new_task(struct rpc_clnt *, rpc_action, int flags); +struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags, + const struct rpc_call_ops *ops, void *data); struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); -void rpc_init_task(struct rpc_task *, struct rpc_clnt *, - rpc_action exitfunc, int flags); +void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, + int flags, const struct rpc_call_ops *ops, + void *data); void rpc_release_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_killall_tasks(struct rpc_clnt *); -- cgit v1.2.3 From 4ce70ada1ff1d0b80916ec9ec5764ce44a50a54f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:05 +0100 Subject: SUNRPC: Further cleanups Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 581d8cdc3b8..ac1326fc3e1 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -112,6 +112,7 @@ struct rpc_task { typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { + void (*rpc_call_prepare)(struct rpc_task *, void *); void (*rpc_call_done)(struct rpc_task *, void *); void (*rpc_release)(void *); }; -- cgit v1.2.3 From 44c288732fdbd7e38460d156a40d29590bf93bce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:06 +0100 Subject: NFSv4: stateful NFSv4 RPC call interface The NFSv4 model requires us to complete all RPC calls that might establish state on the server whether or not the user wants to interrupt it. We may also need to schedule new work (including new RPC calls) in order to cancel the new state. The asynchronous RPC model will allow us to ensure that RPC calls always complete, but in order to allow for "synchronous" RPC, we want to add the ability to wait for completion. The waits are, of course, interruptible. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index ac1326fc3e1..94b0afa4ab0 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -42,6 +42,7 @@ struct rpc_task { #ifdef RPC_DEBUG unsigned long tk_magic; /* 0xf00baa */ #endif + atomic_t tk_count; /* Reference count */ struct list_head tk_task; /* global list of tasks */ struct rpc_clnt * tk_client; /* RPC client */ struct rpc_rqst * tk_rqstp; /* RPC request */ @@ -78,7 +79,6 @@ struct rpc_task { struct timer_list tk_timer; /* kernel timer */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ - unsigned char tk_active : 1;/* Task has been activated */ unsigned char tk_priority : 2;/* Task priority */ unsigned long tk_runstate; /* Task run status */ struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could @@ -136,7 +136,6 @@ struct rpc_call_ops { #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) -#define RPC_IS_ACTIVATED(t) ((t)->tk_active) #define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR) @@ -145,6 +144,7 @@ struct rpc_call_ops { #define RPC_TASK_QUEUED 1 #define RPC_TASK_WAKEUP 2 #define RPC_TASK_HAS_TIMER 3 +#define RPC_TASK_ACTIVE 4 #define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) @@ -175,6 +175,15 @@ struct rpc_call_ops { smp_mb__after_clear_bit(); \ } while (0) +#define RPC_IS_ACTIVATED(t) (test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)) +#define rpc_set_active(t) (set_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)) +#define rpc_clear_active(t) \ + do { \ + smp_mb__before_clear_bit(); \ + clear_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate); \ + smp_mb__after_clear_bit(); \ + } while(0) + /* * Task priorities. * Note: if you change these, you must also change @@ -237,6 +246,8 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags, const struct rpc_call_ops *ops, void *data); +struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, + const struct rpc_call_ops *ops, void *data); struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, @@ -260,6 +271,7 @@ void * rpc_malloc(struct rpc_task *, size_t); int rpciod_up(void); void rpciod_down(void); void rpciod_wake_up(void); +int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); #ifdef RPC_DEBUG void rpc_show_tasks(void); #endif @@ -272,6 +284,11 @@ static inline void rpc_exit(struct rpc_task *task, int status) task->tk_action = rpc_exit_task; } +static inline int rpc_wait_for_completion_task(struct rpc_task *task) +{ + return __rpc_wait_for_completion_task(task, NULL); +} + #ifdef RPC_DEBUG static inline const char * rpc_qname(struct rpc_wait_queue *q) { -- cgit v1.2.3 From cdd4e68b5f0ed12c64b3e2be83655d2a47588a74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:12 +0100 Subject: NFSv4: Make open_confirm() asynchronous too Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 40718669b9c..518cfa5cd02 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -137,7 +137,7 @@ struct nfs_openres { */ struct nfs_open_confirmargs { const struct nfs_fh * fh; - nfs4_stateid stateid; + nfs4_stateid * stateid; struct nfs_seqid * seqid; }; -- cgit v1.2.3 From 911d1aaf26fc4d771174d98fcab710a44e2a5fa0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:16 +0100 Subject: NFSv4: locking XDR cleanup Get rid of some unnecessary intermediate structures Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 52 +++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 518cfa5cd02..b8b0eed98ec 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -165,50 +165,46 @@ struct nfs_closeres { * * Arguments to the lock,lockt, and locku call. * */ struct nfs_lowner { - __u64 clientid; - u32 id; + __u64 clientid; + u32 id; }; -struct nfs_lock_opargs { +struct nfs_lock_args { + struct nfs_fh * fh; + struct file_lock * fl; struct nfs_seqid * lock_seqid; nfs4_stateid * lock_stateid; struct nfs_seqid * open_seqid; nfs4_stateid * open_stateid; - struct nfs_lowner lock_owner; - __u32 reclaim; - __u32 new_lock_owner; + struct nfs_lowner lock_owner; + unsigned char block : 1; + unsigned char reclaim : 1; + unsigned char new_lock_owner : 1; +}; + +struct nfs_lock_res { + nfs4_stateid stateid; }; -struct nfs_locku_opargs { +struct nfs_locku_args { + struct nfs_fh * fh; + struct file_lock * fl; struct nfs_seqid * seqid; nfs4_stateid * stateid; }; -struct nfs_lockargs { - struct nfs_fh * fh; - __u32 type; - __u64 offset; - __u64 length; - union { - struct nfs_lock_opargs *lock; /* LOCK */ - struct nfs_lowner *lockt; /* LOCKT */ - struct nfs_locku_opargs *locku; /* LOCKU */ - } u; +struct nfs_locku_res { + nfs4_stateid stateid; }; -struct nfs_lock_denied { - __u64 offset; - __u64 length; - __u32 type; - struct nfs_lowner owner; +struct nfs_lockt_args { + struct nfs_fh * fh; + struct file_lock * fl; + struct nfs_lowner lock_owner; }; -struct nfs_lockres { - union { - nfs4_stateid stateid;/* LOCK success, LOCKU */ - struct nfs_lock_denied denied; /* LOCK failed, LOCKT success */ - } u; - const struct nfs_server * server; +struct nfs_lockt_res { + struct file_lock * denied; /* LOCK, LOCKT failed */ }; struct nfs4_delegreturnargs { -- cgit v1.2.3 From a911fd9a6046200e439b4af172e8379c0942eec3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 30 Nov 2005 18:08:59 -0500 Subject: NFS: simplify inlined bit ops in nfs_page.h Minor cleanup: inlined bit ops in nfs_page.h can be simpler. Test plan: Write-intensive workload against a server that requires COMMITs. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index da2e077b65e..66e2ed65852 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -79,9 +79,7 @@ extern void nfs_clear_page_writeback(struct nfs_page *req); static inline int nfs_lock_request_dontget(struct nfs_page *req) { - if (test_and_set_bit(PG_BUSY, &req->wb_flags)) - return 0; - return 1; + return !test_and_set_bit(PG_BUSY, &req->wb_flags); } /* @@ -125,9 +123,7 @@ nfs_list_remove_request(struct nfs_page *req) static inline int nfs_defer_commit(struct nfs_page *req) { - if (test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) - return 0; - return 1; + return !test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags); } static inline void @@ -141,9 +137,7 @@ nfs_clear_commit(struct nfs_page *req) static inline int nfs_defer_reschedule(struct nfs_page *req) { - if (test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags)) - return 0; - return 1; + return !test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags); } static inline void -- cgit v1.2.3 From 40859d7ee64ed6bfad8a4e93f9bb5c1074afadff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 30 Nov 2005 18:09:02 -0500 Subject: NFS: support large reads and writes on the wire Most NFS server implementations allow up to 64KB reads and writes on the wire. The Solaris NFS server allows up to a megabyte, for instance. Now the Linux NFS client supports transfer sizes up to 1MB, too. This will help reduce protocol and context switch overhead on read/write intensive NFS workloads, and support larger atomic read and write operations on servers that support them. Test-plan: Connectathon and iozone on mount point with wsize=rsize>32768 over TCP. Tests with NFS over UDP to verify the maximum RPC payload size cap. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 41 +++++++++++++++++++++++++++++++++++------ include/linux/nfs_xdr.h | 29 ++++++++++++++++------------- include/linux/sunrpc/xdr.h | 5 ----- 3 files changed, 51 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4dff705d2ff..d38010ba647 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -38,9 +38,6 @@ # define NFS_DEBUG #endif -#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 -#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 - /* Default timeout values */ #define NFS_MAX_UDP_TIMEOUT (60*HZ) #define NFS_MAX_TCP_TIMEOUT (600*HZ) @@ -462,18 +459,33 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) */ extern mempool_t *nfs_wdata_mempool; -static inline struct nfs_write_data *nfs_writedata_alloc(void) +static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_wdata_mempool); + p = NULL; + } + } } return p; } static inline void nfs_writedata_free(struct nfs_write_data *p) { + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); mempool_free(p, nfs_wdata_mempool); } @@ -492,16 +504,33 @@ extern void nfs_readdata_release(void *data); */ extern mempool_t *nfs_rdata_mempool; -static inline struct nfs_read_data *nfs_readdata_alloc(void) +static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) { struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); - if (p) + + if (p) { memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_rdata_mempool); + p = NULL; + } + } + } return p; } static inline void nfs_readdata_free(struct nfs_read_data *p) { + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); mempool_free(p, nfs_rdata_mempool); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b8b0eed98ec..9f422fd8767 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -4,6 +4,16 @@ #include #include +/* + * To change the maximum rsize and wsize supported by the NFS client, adjust + * NFS_MAX_FILE_IO_SIZE. 64KB is a typical maximum, but some servers can + * support a megabyte or more. The default is left at 4096 bytes, which is + * reasonable for NFS over UDP. + */ +#define NFS_MAX_FILE_IO_SIZE (1048576U) +#define NFS_DEF_FILE_IO_SIZE (4096U) +#define NFS_MIN_FILE_IO_SIZE (1024U) + struct nfs4_fsid { __u64 major; __u64 minor; @@ -215,12 +225,6 @@ struct nfs4_delegreturnargs { /* * Arguments to the read call. */ - -#define NFS_READ_MAXIOV (9U) -#if (NFS_READ_MAXIOV > (MAX_IOVEC -2)) -#error "NFS_READ_MAXIOV is too large" -#endif - struct nfs_readargs { struct nfs_fh * fh; struct nfs_open_context *context; @@ -239,11 +243,6 @@ struct nfs_readres { /* * Arguments to the write call. */ -#define NFS_WRITE_MAXIOV (9U) -#if (NFS_WRITE_MAXIOV > (MAX_IOVEC -2)) -#error "NFS_WRITE_MAXIOV is too large" -#endif - struct nfs_writeargs { struct nfs_fh * fh; struct nfs_open_context *context; @@ -674,6 +673,8 @@ struct nfs4_server_caps_res { struct nfs_page; +#define NFS_PAGEVEC_SIZE (8U) + struct nfs_read_data { int flags; struct rpc_task task; @@ -682,13 +683,14 @@ struct nfs_read_data { struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ struct nfs_page *req; /* multi ops per nfs_page */ - struct page *pagevec[NFS_READ_MAXIOV]; + struct page **pagevec; struct nfs_readargs args; struct nfs_readres res; #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif void (*complete) (struct nfs_read_data *, int); + struct page *page_array[NFS_PAGEVEC_SIZE + 1]; }; struct nfs_write_data { @@ -700,13 +702,14 @@ struct nfs_write_data { struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ struct nfs_page *req; /* multi ops per nfs_page */ - struct page *pagevec[NFS_WRITE_MAXIOV]; + struct page **pagevec; struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif void (*complete) (struct nfs_write_data *, int); + struct page *page_array[NFS_PAGEVEC_SIZE + 1]; }; struct nfs_access_entry; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5da968729cf..5676794ee34 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -134,11 +134,6 @@ xdr_adjust_iovec(struct kvec *iov, u32 *p) return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base); } -/* - * Maximum number of iov's we use. - */ -#define MAX_IOVEC (12) - /* * XDR buffer helper functions */ -- cgit v1.2.3 From 70b9ecbdb9c5fdc731f8780bffd45d9519020c4a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:34 +0100 Subject: NFS: Make stat() return updated mtimes after a write() The SuS states that a call to write() will cause mtime to be updated on the file. In order to satisfy that requirement, we need to flush out any cached writes in nfs_getattr(). Speed things up slightly by not committing the writes. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d38010ba647..408d82d3d97 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -62,6 +62,7 @@ #define FLUSH_STABLE 4 /* commit to stable storage */ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ +#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */ #ifdef __KERNEL__ -- cgit v1.2.3 From fa178f29c0f8a0dce748181a5351f4a92fd4f455 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:38 +0100 Subject: NFSv4: Ensure DELEGRETURN returns attributes Upon return of a write delegation, the server will almost always bump the change attribute. Ensure that we pick up that change so that we don't invalidate our data cache unnecessarily. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9f422fd8767..6d6f69ec567 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -220,6 +220,12 @@ struct nfs_lockt_res { struct nfs4_delegreturnargs { const struct nfs_fh *fhandle; const nfs4_stateid *stateid; + const u32 * bitmask; +}; + +struct nfs4_delegreturnres { + struct nfs_fattr * fattr; + const struct nfs_server *server; }; /* -- cgit v1.2.3 From a72b44222d222749d54b3e370d825094352e389f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:41 +0100 Subject: NFSv4: Allow user to set the port used by the NFSv4 callback channel Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 408d82d3d97..547d649b274 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -391,6 +391,17 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_ */ extern struct inode_operations nfs_symlink_inode_operations; +/* + * linux/fs/nfs/sysctl.c + */ +#ifdef CONFIG_SYSCTL +extern int nfs_register_sysctl(void); +extern void nfs_unregister_sysctl(void); +#else +#define nfs_register_sysctl() do { } while(0) +#define nfs_unregister_sysctl() do { } while(0) +#endif + /* * linux/fs/nfs/unlink.c */ -- cgit v1.2.3 From fb459f45f7c7689714023d41b3dca999bb90a5d3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 3 Jan 2006 09:55:41 +0100 Subject: SUNRPC: net/sunrpc/xdr.c: remove xdr_decode_string() This patch removes ths unused function xdr_decode_string(). Signed-off-by: Adrian Bunk Acked-by: Neil Brown Acked-by: Charles Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5676794ee34..84c35d42d25 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -91,7 +91,6 @@ struct xdr_buf { u32 * xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int len); u32 * xdr_encode_opaque(u32 *p, const void *ptr, unsigned int len); u32 * xdr_encode_string(u32 *p, const char *s); -u32 * xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen); u32 * xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen); u32 * xdr_encode_netobj(u32 *p, const struct xdr_netobj *); u32 * xdr_decode_netobj(u32 *p, struct xdr_netobj *); -- cgit v1.2.3 From 64a318ee2af9000df482d7a125c3b3e1f1007404 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 3 Jan 2006 09:55:46 +0100 Subject: NLM: Further cancel fixes If the server receives an NLM cancel call and finds no waiting lock to cancel, then chances are the lock has already been applied, and the client just hadn't yet processed the NLM granted callback before it sent the cancel. The Open Group text, for example, perimts a server to return either success (LCK_GRANTED) or failure (LCK_DENIED) in this case. But returning an error seems more helpful; the client may be able to use it to recognize that a race has occurred and to recover from the race. So, modify the relevant functions to return an error in this case. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 115e72be25d..2c9c48d6563 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -760,7 +760,7 @@ extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern void posix_block_lock(struct file_lock *, struct file_lock *); -extern void posix_unblock_lock(struct file *, struct file_lock *); +extern int posix_unblock_lock(struct file *, struct file_lock *); extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags); -- cgit v1.2.3 From 02107148349f31eee7c0fb06fd7a880df73dbd20 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:49 +0100 Subject: SUNRPC: switchable buffer allocation Add RPC client transport switch support for replacing buffer management on a per-transport basis. In the current IPv4 socket transport implementation, RPC buffers are allocated as needed for each RPC message that is sent. Some transport implementations may choose to use pre-allocated buffers for encoding, sending, receiving, and unmarshalling RPC messages, however. For transports capable of direct data placement, the buffers can be carved out of a pre-registered area of memory rather than from a slab cache. Test-plan: Millions of fsx operations. Performance characterization with "sio" and "iozone". Use oprofile and other tools to look for significant regression in CPU utilization. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +-- include/linux/sunrpc/xprt.h | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 94b0afa4ab0..8b25629accd 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -52,8 +52,6 @@ struct rpc_task { * RPC call state */ struct rpc_message tk_msg; /* RPC call info */ - __u32 * tk_buffer; /* XDR buffer */ - size_t tk_bufsize; __u8 tk_garb_retry; __u8 tk_cred_retry; @@ -268,6 +266,7 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); void * rpc_malloc(struct rpc_task *, size_t); +void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); void rpciod_wake_up(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3b8b6e823c7..7885b9621ce 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -79,21 +79,19 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; + __u32 * rq_buffer; /* XDR encode buffer */ + size_t rq_bufsize; + struct xdr_buf rq_private_buf; /* The receive buffer * used in the softirq. */ unsigned long rq_majortimeo; /* major timeout alarm */ unsigned long rq_timeout; /* Current timeout value */ unsigned int rq_retries; /* # of retries */ - /* - * For authentication (e.g. auth_des) - */ - u32 rq_creddata[2]; /* * Partial send handling */ - u32 rq_bytes_sent; /* Bytes we have sent */ unsigned long rq_xtime; /* when transmitted */ @@ -107,6 +105,8 @@ struct rpc_xprt_ops { int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*connect)(struct rpc_task *task); + void * (*buf_alloc)(struct rpc_task *task, size_t size); + void (*buf_free)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_task *task); -- cgit v1.2.3 From 35f5a422ce1af836007f811b613c440d0e348e06 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:50 +0100 Subject: SUNRPC: new interface to force an RPC rebind We'd like to hide fields in rpc_xprt and rpc_clnt from upper layer protocols. Start by creating an API to force RPC rebind, replacing logic that simply sets cl_port to zero. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b0ab959eca6..3d605765f84 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -135,6 +135,7 @@ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); +void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); static __inline__ -- cgit v1.2.3 From 922004120b10dcb0ce04b55014168e8a7a8c1a0e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:51 +0100 Subject: SUNRPC: transport switch API for setting port number At some point, transport endpoint addresses will no longer be IPv4. To hide the structure of the rpc_xprt's address field from ULPs and port mappers, add an API for setting the port number during an RPC bind operation. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 7885b9621ce..dd860128ced 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -104,6 +104,7 @@ struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_task *task); void * (*buf_alloc)(struct rpc_task *task, size_t size); void (*buf_free)(struct rpc_task *task); -- cgit v1.2.3 From f518e35aec984036903c1003e867f833747a9d79 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:52 +0100 Subject: SUNRPC: get rid of cl_chatty Clean up: Every ULP that uses the in-kernel RPC client, except the NLM client, sets cl_chatty. There's no reason why NLM shouldn't set it, so just get rid of cl_chatty and always be verbose. Test-plan: Compile with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 3d605765f84..f147e6b8433 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -49,7 +49,6 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ - cl_chatty : 1,/* be verbose */ cl_autobind : 1,/* use getport() */ cl_oneshot : 1,/* dispose after use */ cl_dead : 1;/* abandoned */ -- cgit v1.2.3 From 632e3bdc5006334cea894d078660b691685e1075 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:55 +0100 Subject: SUNRPC: Ensure client closes the socket when server initiates a close If the server decides to close the RPC socket, we currently don't actually respond until either another RPC call is scheduled, or until xprt_autoclose() gets called by the socket expiry timer (which may be up to 5 minutes later). This patch ensures that xprt_autoclose() is called much sooner if the server closes the socket. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index dd860128ced..6ef99b14ff0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -254,6 +254,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); #define XPRT_LOCKED (0) #define XPRT_CONNECTED (1) #define XPRT_CONNECTING (2) +#define XPRT_CLOSE_WAIT (3) static inline void xprt_set_connected(struct rpc_xprt *xprt) { -- cgit v1.2.3 From 58df095b732529ade8f4051b41d7c29731afecd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:57 +0100 Subject: NFSv4: Allow entries in the idmap cache to expire If someone changes the uid/gid mapping in userland, then we do eventually want those changes to be propagated to the kernel. Currently the kernel assumes that it may cache entries forever. Add an expiration time + garbage collector for idmap entries. Signed-off-by: Trond Myklebust --- include/linux/nfs_idmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index a0f1f25e0ea..102e5609429 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -71,6 +71,8 @@ int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *); int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *); int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *); int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *); + +extern unsigned int nfs_idmap_cache_timeout; #endif /* __KERNEL__ */ #endif /* NFS_IDMAP_H */ -- cgit v1.2.3 From 9eed129bbde80cbd7ffeacaa1555ba1e0c9a0997 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 3 Jan 2006 09:56:00 +0100 Subject: SUNRPC: Update the spkm3 code to use the make_checksum interface Also update the tokenlen calculations to accomodate g_token_size(). Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_spkm3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h index 0beb2cf00a8..336e218c278 100644 --- a/include/linux/sunrpc/gss_spkm3.h +++ b/include/linux/sunrpc/gss_spkm3.h @@ -48,7 +48,7 @@ u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struc #define CKSUMTYPE_RSA_MD5 0x0007 s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum); + int body_offset, struct xdr_netobj *cksum); void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits); int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen); -- cgit v1.2.3