From b92dccf65bab3b6b7deb79ff3321dc256eb0f53b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:03 -0500 Subject: NFS: Fix a busy inodes issue... The nfs_open_context may live longer than the file descriptor that spawned it, so it needs to carry a reference to the vfsmount. If not, then generic_shutdown_super() may end up being called before reads and writes have been flushed out. Make a couple of functions static while we're at it... Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b4dc6e2e10c..1161725d75e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -78,6 +78,7 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { atomic_t count; + struct vfsmount *vfsmnt; struct dentry *dentry; struct rpc_cred *cred; struct nfs4_state *state; @@ -311,12 +312,9 @@ extern void nfs_begin_attr_update(struct inode *); extern void nfs_end_attr_update(struct inode *); extern void nfs_begin_data_update(struct inode *); extern void nfs_end_data_update(struct inode *); -extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); -extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); -extern void nfs_file_clear_open_context(struct file *filp); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ -- cgit v1.2.3 From 7bab377fcb495ee2e5a1cd69d235f8d84c76e3af Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:06 -0500 Subject: lockd: Don't expose the process pid to the NLM server Instead we use the nlm_lockowner->pid. Signed-off-by: Trond Myklebust --- include/linux/lockd/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d7a5cc4cfa9..bb0a0f1caa9 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -28,6 +28,7 @@ struct nlm_lock { int len; /* length of "caller" */ struct nfs_fh fh; struct xdr_netobj oh; + u32 svid; struct file_lock fl; }; -- cgit v1.2.3 From 24c5d9d7ea5a64fb5f157d17aa2c67a3300f8a08 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:08 -0500 Subject: SUNRPC: Run rpci->queue_timeout on the rpciod workqueue instead of generic Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8b25629accd..a390c9b8a01 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -276,6 +276,7 @@ void rpc_show_tasks(void); #endif int rpc_init_mempool(void); void rpc_destroy_mempool(void); +extern struct workqueue_struct *rpciod_workqueue; static inline void rpc_exit(struct rpc_task *task, int status) { -- cgit v1.2.3 From 24bd68f46b1ad08d69bf32779f860df867780a7a Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Mon, 20 Mar 2006 13:44:11 -0500 Subject: NFS: Code comments update in NFS read_cache_mtime is no longer used in nfs_inode. This patch removes references of read_cache_mtime in the code comments. Signed-off-by: Goldwyn Rodrigues Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1161725d75e..b71da4d4b13 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -119,8 +119,7 @@ struct nfs_inode { unsigned long cache_validity; /* bit mask */ /* - * read_cache_jiffies is when we started read-caching this inode, - * and read_cache_mtime is the mtime of the inode at that time. + * read_cache_jiffies is when we started read-caching this inode. * attrtimeo is for how long the cached information is assumed * to be valid. A successful attribute revalidation doubles * attrtimeo (up to acregmax/acdirmax), a failure resets it to @@ -129,11 +128,6 @@ struct nfs_inode { * We need to revalidate the cached attrs for this inode if * * jiffies - read_cache_jiffies > attrtimeo - * - * and invalidate any cached data/flush out any dirty pages if - * we find that - * - * mtime != read_cache_mtime */ unsigned long read_cache_jiffies; unsigned long attrtimeo; -- cgit v1.2.3 From b4629fe2f094b719847f31be1ee5ab38300038b2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:12 -0500 Subject: VFS: New /proc file /proc/self/mountstats Create a new file under /proc/self, called mountstats, where mounted file systems can export information (configuration options, performance counters, and so on). Use a mechanism similar to /proc/mounts and s_ops->show_options. This mechanism does not violate namespace security, and is safe to use while other processes are unmounting file systems. Thanks to Mike Waychison for his review and comments. Test-plan: Test concurrent mount/unmount operations while cat'ing /proc/self/mountstats. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 128d0082522..be21e860a9f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1086,6 +1086,7 @@ struct super_operations { void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); + int (*show_stats)(struct seq_file *, struct vfsmount *); ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); -- cgit v1.2.3 From 7a480e250c7ca9187275d8574ae9e48a6b602cb9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:12 -0500 Subject: NFS: show retransmit settings when displaying mount options Sometimes it's important to know the exact RPC retransmit settings the kernel is using for an NFS mount point. Add this facility to the NFS client's show_options method. Test plan: Set various retransmit settings via the mount command, and check that the settings are reflected in /proc/mounts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3d3a305488c..a522ab97358 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -26,6 +26,8 @@ struct nfs_server { unsigned int acregmax; unsigned int acdirmin; unsigned int acdirmax; + unsigned long retrans_timeo; /* retransmit timeout */ + unsigned int retrans_count; /* number of retransmit tries */ unsigned int namelen; char * hostname; /* remote hostname */ struct nfs_fh fh; -- cgit v1.2.3 From d9ef5a8c26aab09762afce43df64736720b4860e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:13 -0500 Subject: NFS: introduce mechanism for tracking NFS client metrics Add a per-superblock performance counter facility to the NFS client. This facility mimics the counters available for block devices and for networking. Expose these new counters via the new /proc/self/mountstats interface. Thanks to Andrew Morton and Trond Myklebust for their review and comments. Test plan: fsx and iozone on UP and SMP systems, with and without pre-emption. Watch for memory overwrite bugs, and performance loss (significantly more CPU required per op). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a522ab97358..d65e69a06b7 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -4,6 +4,8 @@ #include #include +struct nfs_iostats; + /* * NFS client parameters stored in the superblock. */ @@ -12,6 +14,7 @@ struct nfs_server { struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ + struct nfs_iostats * io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ unsigned int caps; /* server capabilities */ -- cgit v1.2.3 From 67ec9f46b889bfb1ab0a4e307d53929d5f0692bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:15 -0500 Subject: NFS: report how long an NFS file system has been mounted Add a field in nfs_server to record a timestamp when a mount succeeds. Report the number of seconds the file system has been mounted via nfs_show_stats(). Test plan: Mount an NFS file system, watch the mountstats reports and compare with clock time. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index d65e69a06b7..65dec21af77 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -35,6 +35,7 @@ struct nfs_server { char * hostname; /* remote hostname */ struct nfs_fh fh; struct sockaddr_in addr; + unsigned long mount_time; /* when this fs was mounted */ #ifdef CONFIG_NFS_V4 /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. -- cgit v1.2.3 From e19b63dafdf7d615b0d36b90990a07e7792b9d3a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:15 -0500 Subject: SUNRPC: track length of RPC wait queues RPC wait queue length will eventually be exported to userland via the RPC iostats interface. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index a390c9b8a01..6c23f73a799 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -203,6 +203,7 @@ struct rpc_wait_queue { unsigned char priority; /* current priority */ unsigned char count; /* # task groups remaining serviced so far */ unsigned char nr; /* # tasks remaining for cookie */ + unsigned short qlen; /* total # tasks waiting in queue */ #ifdef RPC_DEBUG const char * name; #endif -- cgit v1.2.3 From 262ca07de4d7f1bff20361c1353bb14b3607afb2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:16 -0500 Subject: SUNRPC: add a handful of per-xprt counters Monitor generic transport events. Add a transport switch callout to format transport counters for export to user-land. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 6ef99b14ff0..7eebbab7160 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -114,6 +114,7 @@ struct rpc_xprt_ops { void (*release_request)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); + void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); }; struct rpc_xprt { @@ -187,6 +188,18 @@ struct rpc_xprt { struct list_head recv; + struct { + unsigned long bind_count, /* total number of binds */ + connect_count, /* total number of connects */ + connect_start, /* connect start timestamp */ + connect_time, /* jiffies waiting for connect */ + sends, /* how many complete requests */ + recvs, /* how many complete requests */ + bad_xids; /* lookup_rqst didn't find XID */ + + unsigned long long req_u, /* average requests on the wire */ + bklog_u; /* backlog queue utilization */ + } stat; void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); -- cgit v1.2.3 From ef759a2e54ed434b2f72b52a14edecd6d4eadf74 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:17 -0500 Subject: SUNRPC: introduce per-task RPC iostats Account for various things that occur while an RPC task is executed. Separate timers for RPC round trip and RPC execution time show how long RPC requests wait in queue before being sent. Eventually these will be accumulated at xprt_release time in one place where they can be viewed from userland. Test plan: Compile kernel with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6c23f73a799..45a64ae963e 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -86,6 +86,12 @@ struct rpc_task { struct work_struct tk_work; /* Async task work queue */ struct rpc_wait tk_wait; /* RPC wait */ } u; + + unsigned short tk_timeouts; /* maj timeouts */ + size_t tk_bytes_sent; /* total bytes sent */ + unsigned long tk_start; /* RPC task init timestamp */ + long tk_rtt; /* round-trip time (jiffies) */ + #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ #endif -- cgit v1.2.3 From 11c556b3d8d481829ab5f9933a25d29b00913b5a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:22 -0500 Subject: SUNRPC: provide a mechanism for collecting stats in the RPC client Add a simple mechanism for collecting stats in the RPC client. Stats are tabulated during xprt_release. Note that per_cpu shenanigans are not required here because the RPC client already serializes on the transport write lock. Test plan: Compile kernel with CONFIG_NFS enabled. Basic performance regression testing with high-speed networking and high performance server. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 3 +- include/linux/sunrpc/metrics.h | 77 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 include/linux/sunrpc/metrics.h (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index f147e6b8433..0f3662002ff 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -45,7 +45,8 @@ struct rpc_clnt { char * cl_server; /* server machine name */ char * cl_protname; /* protocol name */ struct rpc_auth * cl_auth; /* authenticator */ - struct rpc_stat * cl_stats; /* statistics */ + struct rpc_stat * cl_stats; /* per-program statistics */ + struct rpc_iostats * cl_metrics; /* per-client statistics */ unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h new file mode 100644 index 00000000000..8f96e9dc369 --- /dev/null +++ b/include/linux/sunrpc/metrics.h @@ -0,0 +1,77 @@ +/* + * linux/include/linux/sunrpc/metrics.h + * + * Declarations for RPC client per-operation metrics + * + * Copyright (C) 2005 Chuck Lever + * + * RPC client per-operation statistics provide latency and retry + * information about each type of RPC procedure in a given RPC program. + * These statistics are not for detailed problem diagnosis, but simply + * to indicate whether the problem is local or remote. + * + * These counters are not meant to be human-readable, but are meant to be + * integrated into system monitoring tools such as "sar" and "iostat". As + * such, the counters are sampled by the tools over time, and are never + * zeroed after a file system is mounted. Moving averages can be computed + * by the tools by taking the difference between two instantaneous samples + * and dividing that by the time between the samples. + * + * The counters are maintained in a single array per RPC client, indexed + * by procedure number. There is no need to maintain separate counter + * arrays per-CPU because these counters are always modified behind locks. + */ + +#ifndef _LINUX_SUNRPC_METRICS_H +#define _LINUX_SUNRPC_METRICS_H + +#include + +#define RPC_IOSTATS_VERS "1.0" + +struct rpc_iostats { + /* + * These counters give an idea about how many request + * transmissions are required, on average, to complete that + * particular procedure. Some procedures may require more + * than one transmission because the server is unresponsive, + * the client is retransmitting too aggressively, or the + * requests are large and the network is congested. + */ + unsigned long om_ops, /* count of operations */ + om_ntrans, /* count of RPC transmissions */ + om_timeouts; /* count of major timeouts */ + + /* + * These count how many bytes are sent and received for a + * given RPC procedure type. This indicates how much load a + * particular procedure is putting on the network. These + * counts include the RPC and ULP headers, and the request + * payload. + */ + unsigned long long om_bytes_sent, /* count of bytes out */ + om_bytes_recv; /* count of bytes in */ + + /* + * The length of time an RPC request waits in queue before + * transmission, the network + server latency of the request, + * and the total time the request spent from init to release + * are measured. + */ + unsigned long long om_queue, /* jiffies queued for xmit */ + om_rtt, /* jiffies for RPC RTT */ + om_execute; /* jiffies for RPC execution */ +} ____cacheline_aligned; + +struct rpc_task; +struct rpc_clnt; + +/* + * EXPORTed functions for managing rpc_iostats structures + */ +struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *); +void rpc_count_iostats(struct rpc_task *); +void rpc_print_iostats(struct seq_file *, struct rpc_clnt *); +void rpc_free_iostats(struct rpc_iostats *); + +#endif /* _LINUX_SUNRPC_METRICS_H */ -- cgit v1.2.3 From cc0175c1dc1de8f6af0eb0631dcc5b999a6fcc42 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:22 -0500 Subject: SUNRPC: display human-readable procedure name in rpc_iostats output Add fields to the rpc_procinfo struct that allow the display of a human-readable name for each procedure in the rpc_iostats output. Also fix it so that the NFSv4 stats are broken up correctly by sub-procedure number. NFSv4 uses only two real RPC procedures: NULL, and COMPOUND. Test plan: Mount with NFSv2, NFSv3, and NFSv4, and do "cat /proc/self/mountstats". Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 0f3662002ff..3bec751ee24 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -101,6 +101,8 @@ struct rpc_procinfo { unsigned int p_bufsiz; /* req. buffer size */ unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ + u32 p_statidx; /* Which procedure to account */ + char * p_name; /* name of procedure */ }; #define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) -- cgit v1.2.3 From dead28da8e3fb32601d38fb32b7021122e0a3d21 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:23 -0500 Subject: SUNRPC: eliminate rpc_call() Clean-up: replace rpc_call() helper with direct call to rpc_call_sync. This makes NFSv2 and NFSv3 synchronous calls more computationally efficient, and reduces stack consumption in functions that used to invoke rpc_call more than once. Test plan: Compile kernel with CONFIG_NFS enabled. Connectathon on NFS version 2, version 3, and version 4 mount points. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 14 -------------- include/linux/sunrpc/sched.h | 1 - 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 3bec751ee24..e37c06128e5 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -140,20 +140,6 @@ size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); -static __inline__ -int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) -{ - struct rpc_message msg = { - .rpc_proc = &clnt->cl_procinfo[proc], - .rpc_argp = argp, - .rpc_resp = resp, - .rpc_cred = NULL - }; - return rpc_call_sync(clnt, &msg, flags); -} - -extern void rpciod_wake_up(void); - /* * Helper function for NFSroot support */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 45a64ae963e..82a91bb2236 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -276,7 +276,6 @@ void * rpc_malloc(struct rpc_task *, size_t); void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); -void rpciod_wake_up(void); int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); #ifdef RPC_DEBUG void rpc_show_tasks(void); -- cgit v1.2.3 From 2e0af86f618c697b44e2d67dff151256c58201c4 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 20 Mar 2006 13:44:26 -0500 Subject: locks: remove unused posix_block_lock posix_lock_file() is used to add a blocked lock to Lockd's block, so posix_block_lock() is no longer needed. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index be21e860a9f..b01482c721a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -757,7 +757,6 @@ extern void locks_remove_flock(struct file *); extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); -extern void posix_block_lock(struct file_lock *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); -- cgit v1.2.3 From 8dc7c3115b611c00006eac3ee5b108296432aab7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 20 Mar 2006 13:44:26 -0500 Subject: locks,lockd: fix race in nlmsvc_testlock posix_test_lock() returns a pointer to a struct file_lock which is unprotected and can be removed while in use by the caller. Move the conflicting lock from the return to a parameter, and copy the conflicting lock. In most cases the caller ends up putting the copy of the conflicting lock on the stack. On i386, sizeof(struct file_lock) appears to be about 100 bytes. We're assuming that's reasonable. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b01482c721a..8ef4dd788a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -754,7 +754,7 @@ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); -extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); +extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); -- cgit v1.2.3 From 7117bf3dfb10b534a017260d9fc643bc1d0afd2a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 13:44:26 -0500 Subject: lockd: Remove FL_LOCKD flag Currently lockd identifies its own locks using the FL_LOCKD flag. This doesn't scale well to multiple lock managers--if we did this in nfsv4 too, for example, we'd be left with only one free flag bit. Instead, we just check whether the file manager ops (fl_lmops) set on this lock are our own. The only use for this is in nlm_traverse_locks, which uses it to find locks that need cleaning up when freeing a host or a file. In the long run it might be nice to do reference counting instead of traversing all the locks like this.... Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 8ef4dd788a8..d2cffee8fc1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -667,7 +667,6 @@ extern spinlock_t files_lock; #define FL_POSIX 1 #define FL_FLOCK 2 #define FL_ACCESS 8 /* not trying to lock, just looking */ -#define FL_LOCKD 16 /* lock held by rpc.lockd */ #define FL_LEASE 32 /* lease held on this file */ #define FL_SLEEP 128 /* A blocking lock */ -- cgit v1.2.3 From 788e7a89a03e364855583c0ab4649b94925efbb9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:27 -0500 Subject: NFS: Cleanup of NFS write code in preparation for asynchronous o_direct This patch inverts the callback hierarchy for NFS write calls. Instead of having the NFSv2/v3/v4-specific code set up the RPC callback ops, we allow the original caller to do so. This allows for more flexibility w.r.t. how to set up and tear down the nfs_write_data structure while still allowing the NFSv3/v4 code to perform error handling. The greater flexibility is needed by the asynchronous O_DIRECT code, which wants to be able to hold on to the original nfs_write_data structures after the WRITE RPC call has completed in order to be able to replay them if the COMMIT call determines that the server has rebooted. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 7 ------- include/linux/nfs_xdr.h | 3 ++- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b71da4d4b13..782e5976569 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -407,13 +407,6 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); -extern void nfs_writeback_done(struct rpc_task *task, void *data); -extern void nfs_writedata_release(void *data); - -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern void nfs_commit_done(struct rpc_task *, void *data); -extern void nfs_commit_release(void *data); -#endif /* * Try to write back everything synchronously (but check the diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6d6f69ec567..277750cc70c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -714,7 +714,6 @@ struct nfs_write_data { #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif - void (*complete) (struct nfs_write_data *, int); struct page *page_array[NFS_PAGEVEC_SIZE + 1]; }; @@ -770,7 +769,9 @@ struct nfs_rpc_ops { u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, int how); + int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, int how); + int (*commit_done) (struct rpc_task *, struct nfs_write_data *); int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); -- cgit v1.2.3 From ec06c096edec0755534c7126f4caded69de131c2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:27 -0500 Subject: NFS: Cleanup of NFS read code Same callback hierarchy inversion as for the NFS write calls. This patch is not strictly speaking needed by the O_DIRECT code, but avoids confusing differences between the asynchronous read and write code. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 ++-- include/linux/nfs_xdr.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 782e5976569..f55827be4f8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -492,8 +492,8 @@ static inline void nfs_writedata_free(struct nfs_write_data *p) extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); -extern void nfs_readpage_result(struct rpc_task *, void *); -extern void nfs_readdata_release(void *data); +extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); +extern void nfs_readdata_release(void *data); /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 277750cc70c..7fafc4c546b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -695,7 +695,6 @@ struct nfs_read_data { #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif - void (*complete) (struct nfs_read_data *, int); struct page *page_array[NFS_PAGEVEC_SIZE + 1]; }; @@ -768,6 +767,7 @@ struct nfs_rpc_ops { struct nfs_pathconf *); u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); void (*read_setup) (struct nfs_read_data *); + int (*read_done) (struct rpc_task *, struct nfs_read_data *); void (*write_setup) (struct nfs_write_data *, int how); int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, int how); -- cgit v1.2.3 From 462d5b3296b56289efec426499a83faad4c08d9e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 20 Mar 2006 13:44:32 -0500 Subject: NFS: make direct write path generate write requests concurrently Duplicate infrastructure from direct read path that will allow write path to generate multiple write requests concurrently. This will enable us to add support for aio in this path. Temporarily we will lose the ability to do UNSTABLE writes followed by a COMMIT in the direct write path. However, all applications I am aware of that use NFS O_DIRECT currently write in relatively small chunks, so this should not be inconvenient in any way. Test plan: Millions of fsx-odirect ops. OraSim. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f55827be4f8..6c130a6b0f4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -407,6 +407,8 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); +extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); +extern void nfs_writedata_release(void *); /* * Try to write back everything synchronously (but check the -- cgit v1.2.3 From e17b1fc4b35399935f00a635206e183d9292fe4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:35 -0500 Subject: NFS: Make nfs_commit_alloc() extern We need to use nfs_commit_alloc() in fs/nfs/direct.c. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6c130a6b0f4..423f202b881 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -410,6 +410,11 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern void nfs_writedata_release(void *); +#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount); +void nfs_commit_free(struct nfs_write_data *p); +#endif + /* * Try to write back everything synchronously (but check the * return value!) -- cgit v1.2.3 From fad61490419b3e494f300e9b2579810ef3bcda31 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:36 -0500 Subject: nfs: Use UNSTABLE + COMMIT for NFS O_DIRECT writes Currently NFS O_DIRECT writes use FILE_SYNC so that a COMMIT is not necessary. This simplifies the internal logic, but this could be a difficult workload for some servers. Instead, let's send UNSTABLE writes, and after they all complete, send a COMMIT for the dirty range. After the COMMIT returns successfully, then do the wake_up or fire off aio_complete(). Test plan: Async direct I/O tests against Solaris (or any server that requires committed unstable writes). Reboot server during test. Based on an earlier patch by Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 423f202b881..9f84c8a5ea4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -422,6 +422,7 @@ void nfs_commit_free(struct nfs_write_data *p); extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); +extern void nfs_commit_release(void *wdata); #else static inline int nfs_commit_inode(struct inode *inode, int how) -- cgit v1.2.3 From 3feb2d49394b7874348a6e43c076b780c1d222c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:37 -0500 Subject: NFS: Uninline nfs_writedata_(alloc|free) and nfs_readdata_(alloc|free) Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 67 +++----------------------------------------------- 1 file changed, 4 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9f84c8a5ea4..55de0770df4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -462,37 +462,8 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) /* * Allocate and free nfs_write_data structures */ -extern mempool_t *nfs_wdata_mempool; - -static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) -{ - struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); - - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; - else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { - mempool_free(p, nfs_wdata_mempool); - p = NULL; - } - } - } - return p; -} - -static inline void nfs_writedata_free(struct nfs_write_data *p) -{ - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - mempool_free(p, nfs_wdata_mempool); -} +extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount); +extern void nfs_writedata_free(struct nfs_write_data *p); /* * linux/fs/nfs/read.c @@ -503,41 +474,11 @@ extern int nfs_readpages(struct file *, struct address_space *, extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); extern void nfs_readdata_release(void *data); - /* * Allocate and free nfs_read_data structures */ -extern mempool_t *nfs_rdata_mempool; - -static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) -{ - struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); - - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - if (pagecount < NFS_PAGEVEC_SIZE) - p->pagevec = &p->page_array[0]; - else { - size_t size = ++pagecount * sizeof(struct page *); - p->pagevec = kmalloc(size, GFP_NOFS); - if (p->pagevec) { - memset(p->pagevec, 0, size); - } else { - mempool_free(p, nfs_rdata_mempool); - p = NULL; - } - } - } - return p; -} - -static inline void nfs_readdata_free(struct nfs_read_data *p) -{ - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - mempool_free(p, nfs_rdata_mempool); -} +extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount); +extern void nfs_readdata_free(struct nfs_read_data *p); /* * linux/fs/nfs3proc.c -- cgit v1.2.3 From 6849c0cab69f5d1a0fc7b05fa5bfb3dec53f86df Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:39 -0500 Subject: lockd: Add refcounting to struct nlm_block Otherwise, the block may disappear from underneath us when in nlmsvc_retry_blocked. Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ef21ed29603..08ab9773f76 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,7 @@ struct nlm_file { */ #define NLM_NEVER (~(unsigned long) 0) struct nlm_block { + struct kref b_count; /* Reference count */ struct nlm_block * b_next; /* linked list (all blocks) */ struct nlm_block * b_fnext; /* linked list (per file) */ struct nlm_rqst b_call; /* RPC args & callback info */ @@ -119,7 +121,6 @@ struct nlm_block { unsigned int b_id; /* block id */ unsigned char b_queued; /* re-queued */ unsigned char b_granted; /* VFS granted lock */ - unsigned char b_incall; /* doing callback */ unsigned char b_done; /* callback complete */ struct nlm_file * b_file; /* file in question */ }; -- cgit v1.2.3 From 5e1abf8cb713a0b94f5a400c7b9b797990cd9dec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:39 -0500 Subject: lockd: Clean up of the server-side GRANTED code Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 08ab9773f76..860a93f6ce6 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -153,8 +153,6 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout); u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); void nlmclnt_recovery(struct nlm_host *, u32); int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); -int nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *); -void nlmclnt_freegrantargs(struct nlm_rqst *); /* * Host cache -- cgit v1.2.3 From 26bcbf965f857c710adafd16cf424f043006b5dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Mar 2006 13:44:40 -0500 Subject: lockd: stop abusing file_lock_list Currently lockd directly access the file_lock_list from fs/locks.c. It does so to mark locks granted or reclaimable. This is very suboptimal, because a) lockd needs to poke into locks.c internals, and b) it needs to iterate over all locks in the system for marking locks granted or reclaimable. This patch adds lists for granted and reclaimable locks to the nlm_host structure instead, and adds locks to those. nlmclnt_lock: now adds the lock to h_granted instead of setting the NFS_LCK_GRANTED, still O(1) nlmclnt_mark_reclaim: goes away completely, replaced by a list_splice_init. Complexity reduced from O(locks in the system) to O(1) reclaimer: iterates over h_reclaim now, complexity reduced from O(locks in the system) to O(locks per nlm_host) Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- include/linux/fs.h | 2 -- include/linux/lockd/lockd.h | 2 ++ include/linux/nfs_fs_i.h | 8 +------- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d2cffee8fc1..5dc0fa288a4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -730,8 +730,6 @@ struct file_lock { #define OFFT_OFFSET_MAX INT_LIMIT(off_t) #endif -extern struct list_head file_lock_list; - #include extern int fcntl_getlk(struct file *, struct flock __user *); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 860a93f6ce6..b0f63b6ab0d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -59,6 +59,8 @@ struct nlm_host { unsigned long h_expires; /* eligible for GC */ struct list_head h_lockowners; /* Lockowners for the client */ spinlock_t h_lock; + struct list_head h_granted; /* Locks in GRANTED state */ + struct list_head h_reclaim; /* Locks in RECLAIM state */ }; /* diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h index e2c18dabff8..861730275ba 100644 --- a/include/linux/nfs_fs_i.h +++ b/include/linux/nfs_fs_i.h @@ -12,8 +12,8 @@ struct nlm_lockowner; */ struct nfs_lock_info { u32 state; - u32 flags; struct nlm_lockowner *owner; + struct list_head list; }; struct nfs4_lock_state; @@ -21,10 +21,4 @@ struct nfs4_lock_info { struct nfs4_lock_state *owner; }; -/* - * Lock flag values - */ -#define NFS_LCK_GRANTED 0x0001 /* lock has been granted */ -#define NFS_LCK_RECLAIM 0x0002 /* lock marked for reclaiming */ - #endif -- cgit v1.2.3 From 3a649b884637c4fdff50a6beebc3dc0e6082e048 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:44 -0500 Subject: NLM: Simplify client locks Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index b0f63b6ab0d..cb9933d0409 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -86,7 +86,6 @@ struct nlm_rqst { struct nlm_host * a_host; /* host handle */ struct nlm_args a_args; /* arguments */ struct nlm_res a_res; /* result */ - struct nlm_wait * a_block; unsigned int a_retries; /* Retry count */ char a_owner[NLMCLNT_OHSIZE]; }; @@ -149,9 +148,9 @@ extern unsigned long nlmsvc_timeout; * Lockd client functions */ struct nlm_rqst * nlmclnt_alloc_call(void); -int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl); -void nlmclnt_finish_block(struct nlm_rqst *req); -long nlmclnt_block(struct nlm_rqst *req, long timeout); +struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); +void nlmclnt_finish_block(struct nlm_wait *block); +int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); void nlmclnt_recovery(struct nlm_host *, u32); int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); -- cgit v1.2.3 From 92737230dd3f1478033819d4bc20339f8da852da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:45 -0500 Subject: NLM: Add nlmclnt_release_call Add a helper function to simplify the freeing of NLM client requests. Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index cb9933d0409..e7ba8110d57 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -86,8 +86,9 @@ struct nlm_rqst { struct nlm_host * a_host; /* host handle */ struct nlm_args a_args; /* arguments */ struct nlm_res a_res; /* result */ + struct nlm_block * a_block; unsigned int a_retries; /* Retry count */ - char a_owner[NLMCLNT_OHSIZE]; + u8 a_owner[NLMCLNT_OHSIZE]; }; /* @@ -115,7 +116,7 @@ struct nlm_block { struct kref b_count; /* Reference count */ struct nlm_block * b_next; /* linked list (all blocks) */ struct nlm_block * b_fnext; /* linked list (per file) */ - struct nlm_rqst b_call; /* RPC args & callback info */ + struct nlm_rqst * b_call; /* RPC args & callback info */ struct svc_serv * b_daemon; /* NLM service */ struct nlm_host * b_host; /* host handle for RPC clnt */ unsigned long b_when; /* next re-xmit */ @@ -147,7 +148,9 @@ extern unsigned long nlmsvc_timeout; /* * Lockd client functions */ -struct nlm_rqst * nlmclnt_alloc_call(void); +struct nlm_rqst * nlm_alloc_call(struct nlm_host *host); +void nlm_release_call(struct nlm_rqst *); +int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); void nlmclnt_finish_block(struct nlm_wait *block); int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); @@ -172,7 +175,6 @@ extern struct nlm_host *nlm_find_client(void); /* * Server-side lock handling */ -int nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_lock *, int, struct nlm_cookie *); u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); -- cgit v1.2.3 From d47166244860eb5dfdb12ee4703968beef8a0db2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:45 -0500 Subject: lockd: Add helper for *_RES callbacks Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index e7ba8110d57..a04137d0c5d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -151,6 +151,7 @@ extern unsigned long nlmsvc_timeout; struct nlm_rqst * nlm_alloc_call(struct nlm_host *host); void nlm_release_call(struct nlm_rqst *); int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); +int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *); struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); void nlmclnt_finish_block(struct nlm_wait *block); int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); -- cgit v1.2.3 From 5428154827c2bf7cfdc9dab60db1e0eaa57c027a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:49 -0500 Subject: SUNRPC: Fix a 'Busy inodes' error in rpc_pipefs Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/rpc_pipe_fs.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index e37c06128e5..8fe9f35eba3 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -60,6 +60,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; char cl_pathname[30];/* Path in rpc_pipe_fs */ + struct vfsmount * cl_vfsmnt; struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 63929349571..2c2189cb30a 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -45,6 +45,8 @@ extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); extern int rpc_rmdir(char *); extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); extern int rpc_unlink(char *); +extern struct vfsmount *rpc_get_mount(void); +extern void rpc_put_mount(void); #endif #endif -- cgit v1.2.3 From c42de9dd67250fe984e0e31c9b542d721af6454b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 Mar 2006 13:44:51 -0500 Subject: NFS: Fix a race in nfs_sync_inode() Kudos to Neil Brown for spotting the problem: "in nfs_sync_inode, there is effectively the sequence: nfs_wait_on_requests nfs_flush_inode nfs_commit_inode This seems a bit racy to me as if the only requests are on the ->commit list, and nfs_commit_inode is called separately after nfs_wait_on_requests completes, and before nfs_commit_inode start (say: by nfs_write_inode) then none of these function will return >0, yet there will be some pending request that aren't waited for." The solution is to search for requests to wait upon, search for dirty requests, and search for uncommitted requests while holding the nfsi->req_lock The patch also cleans up nfs_sync_inode(), getting rid of the redundant FLUSH_WAIT flag. It turns out that we were always setting it. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 55de0770df4..cbebd7d1b9e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -56,9 +56,7 @@ * When flushing a cluster of dirty pages, there can be different * strategies: */ -#define FLUSH_AGING 0 /* only flush old buffers */ #define FLUSH_SYNC 1 /* file being synced, or contention */ -#define FLUSH_WAIT 2 /* wait for completion */ #define FLUSH_STABLE 4 /* commit to stable storage */ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ @@ -419,7 +417,7 @@ void nfs_commit_free(struct nfs_write_data *p); * Try to write back everything synchronously (but check the * return value!) */ -extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); +extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern void nfs_commit_release(void *wdata); @@ -440,7 +438,7 @@ nfs_have_writebacks(struct inode *inode) static inline int nfs_wb_all(struct inode *inode) { - int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT); + int error = nfs_sync_inode_wait(inode, 0, 0, 0); return (error < 0) ? error : 0; } @@ -449,8 +447,8 @@ nfs_wb_all(struct inode *inode) */ static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how) { - int error = nfs_sync_inode(inode, page->index, 1, - how | FLUSH_WAIT | FLUSH_STABLE); + int error = nfs_sync_inode_wait(inode, page->index, 1, + how | FLUSH_STABLE); return (error < 0) ? error : 0; } -- cgit v1.2.3 From b63862f46547487388e582e8ac9083830d34f058 Mon Sep 17 00:00:00 2001 From: Dustin Kirkland Date: Thu, 3 Nov 2005 15:41:46 +0000 Subject: [PATCH] Filter rule comparators Currently, audit only supports the "=" and "!=" operators in the -F filter rules. This patch reworks the support for "=" and "!=", and adds support for ">", ">=", "<", and "<=". This turned out to be a pretty clean, and simply process. I ended up using the high order bits of the "field", as suggested by Steve and Amy. This allowed for no changes whatsoever to the netlink communications. See the documentation within the patch in the include/linux/audit.h area, where there is a table that explains the reasoning of the bitmask assignments clearly. The patch adds a new function, audit_comparator(left, op, right). This function will perform the specified comparison (op, which defaults to "==" for backward compatibility) between two values (left and right). If the negate bit is on, it will negate whatever that result was. This value is returned. Signed-off-by: Dustin Kirkland Signed-off-by: David Woodhouse --- include/linux/audit.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index da3c01955f3..2408cb77899 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -98,6 +98,13 @@ #define AUDIT_WORD(nr) ((__u32)((nr)/32)) #define AUDIT_BIT(nr) (1 << ((nr) - AUDIT_WORD(nr)*32)) +/* This bitmask is used to validate user input. It represents all bits that + * are currently used in an audit field constant understood by the kernel. + * If you are adding a new #define AUDIT_, please ensure that + * AUDIT_UNUSED_BITS is updated if need be. */ +#define AUDIT_UNUSED_BITS 0x0FFFFC00 + + /* Rule fields */ /* These are useful when checking the * task structure at task creation time @@ -128,8 +135,28 @@ #define AUDIT_ARG2 (AUDIT_ARG0+2) #define AUDIT_ARG3 (AUDIT_ARG0+3) -#define AUDIT_NEGATE 0x80000000 +#define AUDIT_NEGATE 0x80000000 +/* These are the supported operators. + * 4 2 1 + * = > < + * ------- + * 0 0 0 0 nonsense + * 0 0 1 1 < + * 0 1 0 2 > + * 0 1 1 3 != + * 1 0 0 4 = + * 1 0 1 5 <= + * 1 1 0 6 >= + * 1 1 1 7 all operators + */ +#define AUDIT_LESS_THAN 0x10000000 +#define AUDIT_GREATER_THAN 0x20000000 +#define AUDIT_NOT_EQUAL 0x30000000 +#define AUDIT_EQUAL 0x40000000 +#define AUDIT_LESS_THAN_OR_EQUAL (AUDIT_LESS_THAN|AUDIT_EQUAL) +#define AUDIT_GREATER_THAN_OR_EQUAL (AUDIT_GREATER_THAN|AUDIT_EQUAL) +#define AUDIT_OPERATORS (AUDIT_EQUAL|AUDIT_NOT_EQUAL) /* Status symbols */ /* Mask values */ -- cgit v1.2.3 From 90d526c074ae5db484388da56c399acf892b6c17 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Thu, 3 Nov 2005 15:48:08 +0000 Subject: [PATCH] Define new range of userspace messages. The attached patch updates various items for the new user space messages. Please apply. Signed-off-by: Steve Grubb Signed-off-by: David Woodhouse --- include/linux/audit.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2408cb77899..fd65078e794 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -33,11 +33,20 @@ * 1200 - 1299 messages internal to the audit daemon * 1300 - 1399 audit event messages * 1400 - 1499 SE Linux use - * 1500 - 1999 future use - * 2000 is for otherwise unclassified kernel audit messages + * 1500 - 1599 kernel LSPP events + * 1600 - 1699 kernel crypto events + * 1700 - 1999 future kernel use (maybe integrity labels and related events) + * 2000 is for otherwise unclassified kernel audit messages (legacy) + * 2001 - 2099 unused (kernel) + * 2100 - 2199 user space anomaly records + * 2200 - 2299 user space actions taken in response to anomalies + * 2300 - 2399 user space generated LSPP events + * 2400 - 2499 user space crypto events + * 2500 - 2999 future user space (maybe integrity labels and related events) * - * Messages from 1000-1199 are bi-directional. 1200-1299 are exclusively user - * space. Anything over that is kernel --> user space communication. + * Messages from 1000-1199 are bi-directional. 1200-1299 & 2100 - 2999 are + * exclusively user space. 1300-2099 is kernel --> user space + * communication. */ #define AUDIT_GET 1000 /* Get status */ #define AUDIT_SET 1001 /* Set status (enable/disable/auditd) */ @@ -54,6 +63,8 @@ #define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */ #define AUDIT_USER_AVC 1107 /* We filter this differently */ #define AUDIT_LAST_USER_MSG 1199 +#define AUDIT_FIRST_USER_MSG2 2100 /* More user space messages */ +#define AUDIT_LAST_USER_MSG2 2999 #define AUDIT_DAEMON_START 1200 /* Daemon startup record */ #define AUDIT_DAEMON_END 1201 /* Daemon normal stop record */ -- cgit v1.2.3 From f38aa94224c5517a40ba56d453779f70d3229803 Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 3 Nov 2005 15:57:06 +0000 Subject: [PATCH] Pass dentry, not just name, in fsnotify creation hooks. The audit hooks (to be added shortly) will want to see dentry->d_inode too, not just the name. Signed-off-by: Amy Griffis Signed-off-by: David Woodhouse --- include/linux/fsnotify.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 03b8e7932b8..b5ff64d2f09 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -70,19 +70,20 @@ static inline void fsnotify_inoderemove(struct inode *inode) /* * fsnotify_create - 'name' was linked in */ -static inline void fsnotify_create(struct inode *inode, const char *name) +static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { inode_dir_notify(inode, DN_CREATE); - inotify_inode_queue_event(inode, IN_CREATE, 0, name); + inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name); } /* * fsnotify_mkdir - directory 'name' was created */ -static inline void fsnotify_mkdir(struct inode *inode, const char *name) +static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { inode_dir_notify(inode, DN_CREATE); - inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, name); + inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, + dentry->d_name.name); } /* -- cgit v1.2.3 From 73241ccca0f7786933f1d31b3d86f2456549953a Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 3 Nov 2005 16:00:25 +0000 Subject: [PATCH] Collect more inode information during syscall processing. This patch augments the collection of inode info during syscall processing. It represents part of the functionality that was provided by the auditfs patch included in RHEL4. Specifically, it: - Collects information for target inodes created or removed during syscalls. Previous code only collects information for the target inode's parent. - Adds the audit_inode() hook to syscalls that operate on a file descriptor (e.g. fchown), enabling audit to do inode filtering for these calls. - Modifies filtering code to check audit context for either an inode # or a parent inode # matching a given rule. - Modifies logging to provide inode # for both parent and child. - Protect debug info from NULL audit_names.name. [AV: folded a later typo fix from the same author] Signed-off-by: Amy Griffis Signed-off-by: David Woodhouse Signed-off-by: Al Viro --- include/linux/audit.h | 18 +++++++++++++++++- include/linux/fsnotify.h | 5 +++++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index fd65078e794..739b954cb24 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -260,7 +260,20 @@ extern void audit_syscall_entry(struct task_struct *task, int arch, extern void audit_syscall_exit(struct task_struct *task, int failed, long return_code); extern void audit_getname(const char *name); extern void audit_putname(const char *name); -extern void audit_inode(const char *name, const struct inode *inode, unsigned flags); +extern void __audit_inode(const char *name, const struct inode *inode, unsigned flags); +extern void __audit_inode_child(const char *dname, const struct inode *inode, + unsigned long pino); +static inline void audit_inode(const char *name, const struct inode *inode, + unsigned flags) { + if (unlikely(current->audit_context)) + __audit_inode(name, inode, flags); +} +static inline void audit_inode_child(const char *dname, + const struct inode *inode, + unsigned long pino) { + if (unlikely(current->audit_context)) + __audit_inode_child(dname, inode, pino); +} /* Private API (for audit.c only) */ extern int audit_receive_filter(int type, int pid, int uid, int seq, @@ -283,7 +296,10 @@ extern int audit_filter_user(struct netlink_skb_parms *cb, int type); #define audit_syscall_exit(t,f,r) do { ; } while (0) #define audit_getname(n) do { ; } while (0) #define audit_putname(n) do { ; } while (0) +#define __audit_inode(n,i,f) do { ; } while (0) +#define __audit_inode_child(d,i,p) do { ; } while (0) #define audit_inode(n,i,f) do { ; } while (0) +#define audit_inode_child(d,i,p) do { ; } while (0) #define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; }) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index b5ff64d2f09..94919c376a7 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -15,6 +15,7 @@ #include #include +#include /* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir @@ -45,6 +46,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL); } + audit_inode_child(old_name, source, old_dir->i_ino); + audit_inode_child(new_name, target, new_dir->i_ino); } /* @@ -74,6 +77,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name); + audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino); } /* @@ -84,6 +88,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, dentry->d_name.name); + audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino); } /* -- cgit v1.2.3 From c8edc80c8b8c397c53f4f659a05b9ea6208029bf Mon Sep 17 00:00:00 2001 From: Dustin Kirkland Date: Thu, 3 Nov 2005 16:12:36 +0000 Subject: [PATCH] Exclude messages by message type - Add a new, 5th filter called "exclude". - And add a new field AUDIT_MSGTYPE. - Define a new function audit_filter_exclude() that takes a message type as input and examines all rules in the filter. It returns '1' if the message is to be excluded, and '0' otherwise. - Call the audit_filter_exclude() function near the top of audit_log_start() just after asserting audit_initialized. If the message type is not to be audited, return NULL very early, before doing a lot of work. [combined with followup fix for bug in original patch, Nov 4, same author] [combined with later renaming AUDIT_FILTER_EXCLUDE->AUDIT_FILTER_TYPE and audit_filter_exclude() -> audit_filter_type()] Signed-off-by: Dustin Kirkland Signed-off-by: David Woodhouse Signed-off-by: Al Viro --- include/linux/audit.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 739b954cb24..8fa1a8fbc04 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -92,8 +92,9 @@ #define AUDIT_FILTER_ENTRY 0x02 /* Apply rule at syscall entry */ #define AUDIT_FILTER_WATCH 0x03 /* Apply rule to file system watches */ #define AUDIT_FILTER_EXIT 0x04 /* Apply rule at syscall exit */ +#define AUDIT_FILTER_TYPE 0x05 /* Apply rule at audit_log_start */ -#define AUDIT_NR_FILTERS 5 +#define AUDIT_NR_FILTERS 6 #define AUDIT_FILTER_PREPEND 0x10 /* Prepend to front of list */ @@ -132,6 +133,7 @@ #define AUDIT_LOGINUID 9 #define AUDIT_PERS 10 #define AUDIT_ARCH 11 +#define AUDIT_MSGTYPE 12 /* These are ONLY useful when checking * at syscall exit time (AUDIT_AT_EXIT). */ @@ -289,6 +291,7 @@ extern int audit_sockaddr(int len, void *addr); extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); extern void audit_signal_info(int sig, struct task_struct *t); extern int audit_filter_user(struct netlink_skb_parms *cb, int type); +extern int audit_filter_type(int type); #else #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) -- cgit v1.2.3 From 8c8570fb8feef2bc166bee75a85748b25cda22d9 Mon Sep 17 00:00:00 2001 From: Dustin Kirkland Date: Thu, 3 Nov 2005 17:15:16 +0000 Subject: [PATCH] Capture selinux subject/object context information. This patch extends existing audit records with subject/object context information. Audit records associated with filesystem inodes, ipc, and tasks now contain SELinux label information in the field "subj" if the item is performing the action, or in "obj" if the item is the receiver of an action. These labels are collected via hooks in SELinux and appended to the appropriate record in the audit code. This additional information is required for Common Criteria Labeled Security Protection Profile (LSPP). [AV: fixed kmalloc flags use] [folded leak fixes] [folded cleanup from akpm (kfree(NULL)] [folded audit_inode_context() leak fix] [folded akpm's fix for audit_ipc_perm() definition in case of !CONFIG_AUDIT] Signed-off-by: Dustin Kirkland Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/audit.h | 8 ++++++-- include/linux/security.h | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8fa1a8fbc04..1912d8e8ae9 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -285,13 +285,14 @@ extern void auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); -extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); +extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp); extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); extern void audit_signal_info(int sig, struct task_struct *t); extern int audit_filter_user(struct netlink_skb_parms *cb, int type); extern int audit_filter_type(int type); +extern int audit_set_macxattr(const char *name); #else #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) @@ -306,12 +307,13 @@ extern int audit_filter_type(int type); #define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; }) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) -#define audit_ipc_perms(q,u,g,m) ({ 0; }) +#define audit_ipc_perms(q,u,g,m,i) ({ 0; }) #define audit_socketcall(n,a) ({ 0; }) #define audit_sockaddr(len, addr) ({ 0; }) #define audit_avc_path(dentry, mnt) ({ 0; }) #define audit_signal_info(s,t) do { ; } while (0) #define audit_filter_user(cb,t) ({ 1; }) +#define audit_set_macxattr(n) do { ; } while (0) #endif #ifdef CONFIG_AUDIT @@ -340,6 +342,7 @@ extern void audit_send_reply(int pid, int seq, int type, int done, int multi, void *payload, int size); extern void audit_log_lost(const char *message); +extern void audit_panic(const char *message); extern struct semaphore audit_netlink_sem; #else #define audit_log(c,g,t,f,...) do { ; } while (0) @@ -350,6 +353,7 @@ extern struct semaphore audit_netlink_sem; #define audit_log_hex(a,b,l) do { ; } while (0) #define audit_log_untrustedstring(a,s) do { ; } while (0) #define audit_log_d_path(b,p,d,v) do { ; } while (0) +#define audit_panic(m) do { ; } while (0) #endif #endif #endif diff --git a/include/linux/security.h b/include/linux/security.h index 7cbef482e13..ec0bbbc3ffc 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -869,6 +869,11 @@ struct swap_info_struct; * @ipcp contains the kernel IPC permission structure * @flag contains the desired (requested) permission set * Return 0 if permission is granted. + * @ipc_getsecurity: + * Copy the security label associated with the ipc object into + * @buffer. @buffer may be NULL to request the size of the buffer + * required. @size indicates the size of @buffer in bytes. Return + * number of bytes used/required on success. * * Security hooks for individual messages held in System V IPC message queues * @msg_msg_alloc_security: @@ -1168,6 +1173,7 @@ struct security_operations { int (*inode_getxattr) (struct dentry *dentry, char *name); int (*inode_listxattr) (struct dentry *dentry); int (*inode_removexattr) (struct dentry *dentry, char *name); + char *(*inode_xattr_getsuffix) (void); int (*inode_getsecurity)(struct inode *inode, const char *name, void *buffer, size_t size, int err); int (*inode_setsecurity)(struct inode *inode, const char *name, const void *value, size_t size, int flags); int (*inode_listsecurity)(struct inode *inode, char *buffer, size_t buffer_size); @@ -1217,6 +1223,7 @@ struct security_operations { void (*task_to_inode)(struct task_struct *p, struct inode *inode); int (*ipc_permission) (struct kern_ipc_perm * ipcp, short flag); + int (*ipc_getsecurity)(struct kern_ipc_perm *ipcp, void *buffer, size_t size); int (*msg_msg_alloc_security) (struct msg_msg * msg); void (*msg_msg_free_security) (struct msg_msg * msg); @@ -1674,6 +1681,11 @@ static inline int security_inode_removexattr (struct dentry *dentry, char *name) return security_ops->inode_removexattr (dentry, name); } +static inline const char *security_inode_xattr_getsuffix(void) +{ + return security_ops->inode_xattr_getsuffix(); +} + static inline int security_inode_getsecurity(struct inode *inode, const char *name, void *buffer, size_t size, int err) { if (unlikely (IS_PRIVATE (inode))) @@ -1869,6 +1881,11 @@ static inline int security_ipc_permission (struct kern_ipc_perm *ipcp, return security_ops->ipc_permission (ipcp, flag); } +static inline int security_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size) +{ + return security_ops->ipc_getsecurity(ipcp, buffer, size); +} + static inline int security_msg_msg_alloc (struct msg_msg * msg) { return security_ops->msg_msg_alloc_security (msg); @@ -2316,6 +2333,11 @@ static inline int security_inode_removexattr (struct dentry *dentry, char *name) return cap_inode_removexattr(dentry, name); } +static inline const char *security_inode_xattr_getsuffix (void) +{ + return NULL ; +} + static inline int security_inode_getsecurity(struct inode *inode, const char *name, void *buffer, size_t size, int err) { return -EOPNOTSUPP; @@ -2499,6 +2521,11 @@ static inline int security_ipc_permission (struct kern_ipc_perm *ipcp, return 0; } +static inline int security_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size) +{ + return -EOPNOTSUPP; +} + static inline int security_msg_msg_alloc (struct msg_msg * msg) { return 0; -- cgit v1.2.3 From 7306a0b9b3e2056a616c84841288ca2431a05627 Mon Sep 17 00:00:00 2001 From: Dustin Kirkland Date: Wed, 16 Nov 2005 15:53:13 +0000 Subject: [PATCH] Miscellaneous bug and warning fixes This patch fixes a couple of bugs revealed in new features recently added to -mm1: * fixes warnings due to inconsistent use of const struct inode *inode * fixes bug that prevent a kernel from booting with audit on, and SELinux off due to a missing function in security/dummy.c * fixes a bug that throws spurious audit_panic() messages due to a missing return just before an error_path label * some reasonable house cleaning in audit_ipc_context(), audit_inode_context(), and audit_log_task_context() Signed-off-by: Dustin Kirkland Signed-off-by: David Woodhouse --- include/linux/security.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index ec0bbbc3ffc..2a502250eb5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1173,8 +1173,8 @@ struct security_operations { int (*inode_getxattr) (struct dentry *dentry, char *name); int (*inode_listxattr) (struct dentry *dentry); int (*inode_removexattr) (struct dentry *dentry, char *name); - char *(*inode_xattr_getsuffix) (void); - int (*inode_getsecurity)(struct inode *inode, const char *name, void *buffer, size_t size, int err); + const char *(*inode_xattr_getsuffix) (void); + int (*inode_getsecurity)(const struct inode *inode, const char *name, void *buffer, size_t size, int err); int (*inode_setsecurity)(struct inode *inode, const char *name, const void *value, size_t size, int flags); int (*inode_listsecurity)(struct inode *inode, char *buffer, size_t buffer_size); @@ -1686,7 +1686,7 @@ static inline const char *security_inode_xattr_getsuffix(void) return security_ops->inode_xattr_getsuffix(); } -static inline int security_inode_getsecurity(struct inode *inode, const char *name, void *buffer, size_t size, int err) +static inline int security_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err) { if (unlikely (IS_PRIVATE (inode))) return 0; @@ -2338,7 +2338,7 @@ static inline const char *security_inode_xattr_getsuffix (void) return NULL ; } -static inline int security_inode_getsecurity(struct inode *inode, const char *name, void *buffer, size_t size, int err) +static inline int security_inode_getsecurity(const struct inode *inode, const char *name, void *buffer, size_t size, int err) { return -EOPNOTSUPP; } -- cgit v1.2.3 From fe7752bab26a9ac0651b695ad4f55659761f68f7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Dec 2005 18:33:52 +0000 Subject: [PATCH] Fix audit record filtering with !CONFIG_AUDITSYSCALL This fixes the per-user and per-message-type filtering when syscall auditing isn't enabled. [AV: folded followup fix from the same author] Signed-off-by: David Woodhouse Signed-off-by: Al Viro --- include/linux/audit.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 1912d8e8ae9..fbc21d6267f 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -278,8 +278,6 @@ static inline void audit_inode_child(const char *dname, } /* Private API (for audit.c only) */ -extern int audit_receive_filter(int type, int pid, int uid, int seq, - void *data, uid_t loginuid); extern unsigned int audit_serial(void); extern void auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); @@ -290,8 +288,6 @@ extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); extern void audit_signal_info(int sig, struct task_struct *t); -extern int audit_filter_user(struct netlink_skb_parms *cb, int type); -extern int audit_filter_type(int type); extern int audit_set_macxattr(const char *name); #else #define audit_alloc(t) ({ 0; }) @@ -304,7 +300,6 @@ extern int audit_set_macxattr(const char *name); #define __audit_inode_child(d,i,p) do { ; } while (0) #define audit_inode(n,i,f) do { ; } while (0) #define audit_inode_child(d,i,p) do { ; } while (0) -#define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; }) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) #define audit_ipc_perms(q,u,g,m,i) ({ 0; }) @@ -312,7 +307,6 @@ extern int audit_set_macxattr(const char *name); #define audit_sockaddr(len, addr) ({ 0; }) #define audit_avc_path(dentry, mnt) ({ 0; }) #define audit_signal_info(s,t) do { ; } while (0) -#define audit_filter_user(cb,t) ({ 1; }) #define audit_set_macxattr(n) do { ; } while (0) #endif @@ -337,13 +331,11 @@ extern void audit_log_d_path(struct audit_buffer *ab, const char *prefix, struct dentry *dentry, struct vfsmount *vfsmnt); - /* Private API (for auditsc.c only) */ -extern void audit_send_reply(int pid, int seq, int type, - int done, int multi, - void *payload, int size); -extern void audit_log_lost(const char *message); -extern void audit_panic(const char *message); -extern struct semaphore audit_netlink_sem; + /* Private API (for audit.c only) */ +extern int audit_filter_user(struct netlink_skb_parms *cb, int type); +extern int audit_filter_type(int type); +extern int audit_receive_filter(int type, int pid, int uid, int seq, + void *data, uid_t loginuid); #else #define audit_log(c,g,t,f,...) do { ; } while (0) #define audit_log_start(c,g,t) ({ NULL; }) -- cgit v1.2.3 From af601e4623d0303bfafa54ec728b7ae8493a8e1b Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Wed, 4 Jan 2006 14:08:39 +0000 Subject: [PATCH] SE Linux audit events Attached is a patch that hardwires important SE Linux events to the audit system. Please Apply. Signed-off-by: Steve Grubb Acked-by: Stephen Smalley Signed-off-by: David Woodhouse --- include/linux/audit.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index fbc21d6267f..8868c96ca8a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -83,6 +83,9 @@ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ #define AUDIT_AVC_PATH 1402 /* dentry, vfsmount pair from avc */ +#define AUDIT_MAC_POLICY_LOAD 1403 /* Policy file load */ +#define AUDIT_MAC_STATUS 1404 /* Changed enforcing,permissive,off */ +#define AUDIT_MAC_CONFIG_CHANGE 1405 /* Changes to booleans */ #define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */ -- cgit v1.2.3 From 93315ed6dd12dacfc941f9eb8ca0293aadf99793 Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Tue, 7 Feb 2006 12:05:27 -0500 Subject: [PATCH] audit string fields interface + consumer Updated patch to dynamically allocate audit rule fields in kernel's internal representation. Added unlikely() calls for testing memory allocation result. Amy Griffis wrote: [Wed Jan 11 2006, 02:02:31PM EST] > Modify audit's kernel-userspace interface to allow the specification > of string fields in audit rules. > > Signed-off-by: Amy Griffis Signed-off-by: Al Viro (cherry picked from 5ffc4a863f92351b720fe3e9c5cd647accff9e03 commit) --- include/linux/audit.h | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8868c96ca8a..8a3b98175c2 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -50,15 +50,18 @@ */ #define AUDIT_GET 1000 /* Get status */ #define AUDIT_SET 1001 /* Set status (enable/disable/auditd) */ -#define AUDIT_LIST 1002 /* List syscall filtering rules */ -#define AUDIT_ADD 1003 /* Add syscall filtering rule */ -#define AUDIT_DEL 1004 /* Delete syscall filtering rule */ +#define AUDIT_LIST 1002 /* List syscall rules -- deprecated */ +#define AUDIT_ADD 1003 /* Add syscall rule -- deprecated */ +#define AUDIT_DEL 1004 /* Delete syscall rule -- deprecated */ #define AUDIT_USER 1005 /* Message from userspace -- deprecated */ #define AUDIT_LOGIN 1006 /* Define the login id and information */ #define AUDIT_WATCH_INS 1007 /* Insert file/dir watch entry */ #define AUDIT_WATCH_REM 1008 /* Remove file/dir watch entry */ #define AUDIT_WATCH_LIST 1009 /* List all file/dir watches */ #define AUDIT_SIGNAL_INFO 1010 /* Get info about sender of signal to auditd */ +#define AUDIT_ADD_RULE 1011 /* Add syscall filtering rule */ +#define AUDIT_DEL_RULE 1012 /* Delete syscall filtering rule */ +#define AUDIT_LIST_RULES 1013 /* List syscall filtering rules */ #define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */ #define AUDIT_USER_AVC 1107 /* We filter this differently */ @@ -229,6 +232,26 @@ struct audit_status { __u32 backlog; /* messages waiting in queue */ }; +/* audit_rule_data supports filter rules with both integer and string + * fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and + * AUDIT_LIST_RULES requests. + */ +struct audit_rule_data { + __u32 flags; /* AUDIT_PER_{TASK,CALL}, AUDIT_PREPEND */ + __u32 action; /* AUDIT_NEVER, AUDIT_POSSIBLE, AUDIT_ALWAYS */ + __u32 field_count; + __u32 mask[AUDIT_BITMASK_SIZE]; + __u32 fields[AUDIT_MAX_FIELDS]; + __u32 values[AUDIT_MAX_FIELDS]; + __u32 fieldflags[AUDIT_MAX_FIELDS]; + __u32 buflen; /* total length of string fields */ + char buf[0]; /* string fields buffer */ +}; + +/* audit_rule is supported to maintain backward compatibility with + * userspace. It supports integer fields only and corresponds to + * AUDIT_ADD, AUDIT_DEL and AUDIT_LIST requests. + */ struct audit_rule { /* for AUDIT_LIST, AUDIT_ADD, and AUDIT_DEL */ __u32 flags; /* AUDIT_PER_{TASK,CALL}, AUDIT_PREPEND */ __u32 action; /* AUDIT_NEVER, AUDIT_POSSIBLE, AUDIT_ALWAYS */ @@ -338,7 +361,7 @@ extern void audit_log_d_path(struct audit_buffer *ab, extern int audit_filter_user(struct netlink_skb_parms *cb, int type); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int uid, int seq, - void *data, uid_t loginuid); + void *data, size_t datasz, uid_t loginuid); #else #define audit_log(c,g,t,f,...) do { ; } while (0) #define audit_log_start(c,g,t) ({ NULL; }) -- cgit v1.2.3 From 5d3301088f7e412992d9e61cc3604cbdff3090ff Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Mon, 9 Jan 2006 09:48:17 -0500 Subject: [PATCH] add/remove rule update Hi, The following patch adds a little more information to the add/remove rule message emitted by the kernel. Signed-off-by: Steve Grubb Signed-off-by: Al Viro --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8a3b98175c2..d760430c8de 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -240,7 +240,7 @@ struct audit_rule_data { __u32 flags; /* AUDIT_PER_{TASK,CALL}, AUDIT_PREPEND */ __u32 action; /* AUDIT_NEVER, AUDIT_POSSIBLE, AUDIT_ALWAYS */ __u32 field_count; - __u32 mask[AUDIT_BITMASK_SIZE]; + __u32 mask[AUDIT_BITMASK_SIZE]; /* syscall(s) affected */ __u32 fields[AUDIT_MAX_FIELDS]; __u32 values[AUDIT_MAX_FIELDS]; __u32 fieldflags[AUDIT_MAX_FIELDS]; -- cgit v1.2.3 From 5bdb98868062c1b14025883049551af343233187 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Sat, 3 Dec 2005 08:39:35 -0500 Subject: [PATCH] promiscuous mode Hi, When a network interface goes into promiscuous mode, its an important security issue. The attached patch is intended to capture that action and send an event to the audit system. The patch carves out a new block of numbers for kernel detected anomalies. These are events that may indicate suspicious activity. Other examples of potential kernel anomalies would be: exceeding disk quota, rlimit violations, changes to syscall entry table. Signed-off-by: Steve Grubb Signed-off-by: Al Viro --- include/linux/audit.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index d760430c8de..1c47c59058c 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -35,7 +35,8 @@ * 1400 - 1499 SE Linux use * 1500 - 1599 kernel LSPP events * 1600 - 1699 kernel crypto events - * 1700 - 1999 future kernel use (maybe integrity labels and related events) + * 1700 - 1799 kernel anomaly records + * 1800 - 1999 future kernel use (maybe integrity labels and related events) * 2000 is for otherwise unclassified kernel audit messages (legacy) * 2001 - 2099 unused (kernel) * 2100 - 2199 user space anomaly records @@ -90,6 +91,10 @@ #define AUDIT_MAC_STATUS 1404 /* Changed enforcing,permissive,off */ #define AUDIT_MAC_CONFIG_CHANGE 1405 /* Changes to booleans */ +#define AUDIT_FIRST_KERN_ANOM_MSG 1700 +#define AUDIT_LAST_KERN_ANOM_MSG 1799 +#define AUDIT_ANOM_PROMISCUOUS 1700 /* Device changed promiscuous mode */ + #define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */ /* Rule flags */ -- cgit v1.2.3 From eaa82edf20d738a7ae31f4b0a5f72f64c14a58df Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 23:24:04 -0500 Subject: SUNRPC,RPCSEC_GSS: fix krb5 sequence numbers. Use a spinlock to ensure unique sequence numbers when creating krb5 gss tokens. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 2c3601d3104..1279280d719 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -53,6 +53,8 @@ struct krb5_ctx { struct xdr_netobj mech_used; }; +extern spinlock_t krb5_seq_lock; + #define KG_TOK_MIC_MSG 0x0101 #define KG_TOK_WRAP_MSG 0x0201 -- cgit v1.2.3 From f3ee439f43381e45b191cf721b4a51d41f33301f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 23:24:13 -0500 Subject: LOCKD: nlmsvc_traverse_blocks return is unused Note that we never return non-zero. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index a04137d0c5d..995f89dc8c0 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -183,7 +183,7 @@ u32 nlmsvc_testlock(struct nlm_file *, struct nlm_lock *, struct nlm_lock *); u32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *); unsigned long nlmsvc_retry_blocked(void); -int nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, +void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, int action); void nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32); -- cgit v1.2.3 From 5f12191bc000ea31970339a5f54c11087506711c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 20 Mar 2006 23:24:25 -0500 Subject: LOCKD: Make nlmsvc_traverse_shares return void The nlmsvc_traverse_shares return value is always zero, hence useless. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/lockd/share.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h index 5d8aa325f14..c75a424ebe4 100644 --- a/include/linux/lockd/share.h +++ b/include/linux/lockd/share.h @@ -25,6 +25,6 @@ u32 nlmsvc_share_file(struct nlm_host *, struct nlm_file *, struct nlm_args *); u32 nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *, struct nlm_args *); -int nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int); +void nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int); #endif /* LINUX_LOCKD_SHARE_H */ -- cgit v1.2.3 From 13fce8062968996da496d4f65cc1c1f845704604 Mon Sep 17 00:00:00 2001 From: Andrzej Zaborowski Date: Fri, 24 Mar 2006 18:13:37 +0100 Subject: Fix simple typos This corrects some trivial errors in ARM docs and comments, Signed-off-by: Adrian Bunk --- include/linux/timer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 9b9877fd250..ee5a09e806e 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -69,13 +69,13 @@ extern unsigned long next_timer_interrupt(void); * @timer: the timer to be added * * The kernel will do a ->function(->data) callback from the - * timer interrupt at the ->expired point in the future. The + * timer interrupt at the ->expires point in the future. The * current time is 'jiffies'. * - * The timer's ->expired, ->function (and if the handler uses it, ->data) + * The timer's ->expires, ->function (and if the handler uses it, ->data) * fields must be set prior calling this function. * - * Timers with an ->expired field in the past will be executed in the next + * Timers with an ->expires field in the past will be executed in the next * timer tick. */ static inline void add_timer(struct timer_list *timer) -- cgit v1.2.3 From 301e22d69140898eddd38a9134da711cb5dfc170 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 18 Mar 2006 17:15:00 -0300 Subject: V4L/DVB (3584): Implement V4L2_TUNER_MODE_LANG1_LANG2 audio mode Add a new audio mode V4L2_TUNER_MODE_LANG1_LANG2 (used by VIDIOC_G/S_TUNER). This mode allows the user to select both languages of a bilingual transmission, one language on the left, one on the right audio channel. If there is no bilingual transmission, or it is not supported, then this mode should act like V4L2_TUNER_MODE_STEREO. This mode is introduced for PVR-like drivers where it is useful to be able to record both languages of a bilingual broadcast. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 724cfbf54b8..2275bfec5b6 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -883,6 +883,7 @@ struct v4l2_modulator #define V4L2_TUNER_MODE_LANG2 0x0002 #define V4L2_TUNER_MODE_SAP 0x0002 #define V4L2_TUNER_MODE_LANG1 0x0003 +#define V4L2_TUNER_MODE_LANG1_LANG2 0x0004 struct v4l2_frequency { -- cgit v1.2.3 From a20c522498330ba0f4970a9bcd11890312277ae2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 23 Mar 2006 19:37:58 -0300 Subject: V4L/DVB (3598): Add bit algorithm adapter for the Conexant CX2341X boards. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/i2c-id.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index ec311bc8943..679b46a6a56 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -184,6 +184,7 @@ #define I2C_HW_B_SAVAGE 0x01001d /* savage framebuffer driver */ #define I2C_HW_B_RADEON 0x01001e /* radeon framebuffer driver */ #define I2C_HW_B_EM28XX 0x01001f /* em28xx video capture cards */ +#define I2C_HW_B_CX2341X 0x010020 /* Conexant CX2341X MPEG encoder cards */ /* --- PCF 8584 based algorithms */ #define I2C_HW_P_LP 0x020000 /* Parallel port interface */ -- cgit v1.2.3 From a1a8feed1743ec8d2af1dafa7c5321679f0a3e4f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 23 Mar 2006 22:07:34 -0800 Subject: [MODULES]: Don't allow statically declared exports Add an extern declaration for exported symbols to make the compiler warn on symbols declared statically. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/module.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 70bd843c71c..d9569151c18 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -183,6 +183,7 @@ void *__symbol_get_gpl(const char *symbol); /* For every exported symbol, place a struct in the __ksymtab section */ #define __EXPORT_SYMBOL(sym, sec) \ + extern typeof(sym) sym; \ __CRC_SYMBOL(sym, sec) \ static const char __kstrtab_##sym[] \ __attribute__((section("__ksymtab_strings"))) \ -- cgit v1.2.3 From c08b8a49100715b20e6f7c997e992428b5e06078 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Mar 2006 03:06:33 -0800 Subject: [PATCH] sys_alarm() unsigned signed conversion fixup alarm() calls the kernel with an unsigend int timeout in seconds. The value is stored in the tv_sec field of a struct timeval to setup the itimer. The tv_sec field of struct timeval is of type long, which causes the tv_sec value to be negative on 32 bit machines if seconds > INT_MAX. Before the hrtimer merge (pre 2.6.16) such a negative value was converted to the maximum jiffies timeout by the timeval_to_jiffies conversion. It's not clear whether this was intended or just happened to be done by the timeval_to_jiffies code. hrtimers expect a timeval in canonical form and treat a negative timeout as already expired. This breaks the legitimate usage of alarm() with a timeout value > INT_MAX seconds. For 32 bit machines it is therefor necessary to limit the internal seconds value to avoid API breakage. Instead of doing this in all implementations of sys_alarm the duplicated sys_alarm code is moved into a common function in itimer.c Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index d9cdba54b78..bf0e785e2e0 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -101,6 +101,7 @@ extern long do_utimes(int dfd, char __user *filename, struct timeval *times); struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); +extern unsigned int alarm_setitimer(unsigned int seconds); extern int do_getitimer(int which, struct itimerval *value); extern void getnstimeofday(struct timespec *tv); -- cgit v1.2.3 From 871751e25d956ad24f129ca972b7851feaa61d53 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 25 Mar 2006 03:06:39 -0800 Subject: [PATCH] slab: implement /proc/slab_allocators Implement /proc/slab_allocators. It produces output like: idr_layer_cache: 80 idr_pre_get+0x33/0x4e buffer_head: 2555 alloc_buffer_head+0x20/0x75 mm_struct: 9 mm_alloc+0x1e/0x42 mm_struct: 20 dup_mm+0x36/0x370 vm_area_struct: 384 dup_mm+0x18f/0x370 vm_area_struct: 151 do_mmap_pgoff+0x2e0/0x7c3 vm_area_struct: 1 split_vma+0x5a/0x10e vm_area_struct: 11 do_brk+0x206/0x2e2 vm_area_struct: 2 copy_vma+0xda/0x142 vm_area_struct: 9 setup_arg_pages+0x99/0x214 fs_cache: 8 copy_fs_struct+0x21/0x133 fs_cache: 29 copy_process+0xf38/0x10e3 files_cache: 30 alloc_files+0x1b/0xcf signal_cache: 81 copy_process+0xbaa/0x10e3 sighand_cache: 77 copy_process+0xe65/0x10e3 sighand_cache: 1 de_thread+0x4d/0x5f8 anon_vma: 241 anon_vma_prepare+0xd9/0xf3 size-2048: 1 add_sect_attrs+0x5f/0x145 size-2048: 2 journal_init_revoke+0x99/0x302 size-2048: 2 journal_init_revoke+0x137/0x302 size-2048: 2 journal_init_inode+0xf9/0x1c4 Cc: Manfred Spraul Cc: Alexander Nyberg Cc: Pekka Enberg Cc: Christoph Lameter Cc: Ravikiran Thirumalai Signed-off-by: Al Viro DESC slab-leaks3-locking-fix EDESC From: Andrew Morton Update for slab-remove-cachep-spinlock.patch Cc: Al Viro Cc: Manfred Spraul Cc: Alexander Nyberg Cc: Pekka Enberg Cc: Christoph Lameter Cc: Ravikiran Thirumalai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index e2ee5b26879..f88e08a5802 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -77,11 +77,12 @@ struct cache_sizes { }; extern struct cache_sizes malloc_sizes[]; -#ifndef CONFIG_DEBUG_SLAB extern void *__kmalloc(size_t, gfp_t); +#ifndef CONFIG_DEBUG_SLAB +#define ____kmalloc(size, flags) __kmalloc(size, flags) #else extern void *__kmalloc_track_caller(size_t, gfp_t, void*); -#define __kmalloc(size, flags) \ +#define ____kmalloc(size, flags) \ __kmalloc_track_caller(size, flags, __builtin_return_address(0)) #endif @@ -173,6 +174,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) #define kmem_ptr_validate(a, b) (0) #define kmem_cache_alloc_node(c, f, n) kmem_cache_alloc(c, f) #define kmalloc_node(s, f, n) kmalloc(s, f) +#define ____kmalloc kmalloc #endif /* CONFIG_SLOB */ -- cgit v1.2.3 From a8c0f9a41f88da703ade33f9c1626a55c786e8bb Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 25 Mar 2006 03:06:42 -0800 Subject: [PATCH] slab: introduce kmem_cache_zalloc allocator Introduce a memory-zeroing variant of kmem_cache_alloc. The allocator already exits in XFS and there are potential users for it so this patch makes the allocator available for the general public. Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index f88e08a5802..1216b09e07b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -64,6 +64,7 @@ extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned lo extern int kmem_cache_destroy(kmem_cache_t *); extern int kmem_cache_shrink(kmem_cache_t *); extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t); +extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); @@ -156,6 +157,7 @@ struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, void (*)(void *, struct kmem_cache *, unsigned long)); int kmem_cache_destroy(struct kmem_cache *c); void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags); +void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); void kmem_cache_free(struct kmem_cache *c, void *b); const char *kmem_cache_name(struct kmem_cache *); void *kmalloc(size_t size, gfp_t flags); -- cgit v1.2.3 From 40c07ae8daa659b8feb149c84731629386873c16 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 25 Mar 2006 03:06:43 -0800 Subject: [PATCH] slab: optimize constant-size kzalloc calls As suggested by Eric Dumazet, optimize kzalloc() calls that pass a compile-time constant size. Please note that the patch increases kernel text slightly (~200 bytes for defconfig on x86). Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 1216b09e07b..15e1d9736b1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -110,7 +110,30 @@ found: return __kmalloc(size, flags); } -extern void *kzalloc(size_t, gfp_t); +extern void *__kzalloc(size_t, gfp_t); + +static inline void *kzalloc(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size)) { + int i = 0; +#define CACHE(x) \ + if (size <= x) \ + goto found; \ + else \ + i++; +#include "kmalloc_sizes.h" +#undef CACHE + { + extern void __you_cannot_kzalloc_that_much(void); + __you_cannot_kzalloc_that_much(); + } +found: + return kmem_cache_zalloc((flags & GFP_DMA) ? + malloc_sizes[i].cs_dmacachep : + malloc_sizes[i].cs_cachep, flags); + } + return __kzalloc(size, flags); +} /** * kcalloc - allocate memory for an array. The memory is set to zero. @@ -161,14 +184,14 @@ void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); void kmem_cache_free(struct kmem_cache *c, void *b); const char *kmem_cache_name(struct kmem_cache *); void *kmalloc(size_t size, gfp_t flags); -void *kzalloc(size_t size, gfp_t flags); +void *__kzalloc(size_t size, gfp_t flags); void kfree(const void *m); unsigned int ksize(const void *m); unsigned int kmem_cache_size(struct kmem_cache *c); static inline void *kcalloc(size_t n, size_t size, gfp_t flags) { - return kzalloc(n * size, flags); + return __kzalloc(n * size, flags); } #define kmem_cache_shrink(d) (0) @@ -176,6 +199,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) #define kmem_ptr_validate(a, b) (0) #define kmem_cache_alloc_node(c, f, n) kmem_cache_alloc(c, f) #define kmalloc_node(s, f, n) kmalloc(s, f) +#define kzalloc(s, f) __kzalloc(s, f) #define ____kmalloc kmalloc #endif /* CONFIG_SLOB */ -- cgit v1.2.3 From e3df18983ea090a2e00dd5c2c6167bb431a0e0a2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Mar 2006 03:06:53 -0800 Subject: [PATCH] jbd: embed j_commit_timer in journal struct The kjournald timer is currently on the kernel thread's stack and the journal structure points at it. Save a pointer hop by moving the timer into the journal structure. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 2ccbfb6340b..4fc7dffd66e 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -29,6 +29,8 @@ #include #include #include +#include + #include #endif @@ -787,7 +789,7 @@ struct journal_s unsigned long j_commit_interval; /* The timer used to wakeup the commit thread: */ - struct timer_list *j_commit_timer; + struct timer_list j_commit_timer; /* * The revoke table: maintains the list of revoked blocks in the -- cgit v1.2.3 From ca5734db60630f7c5564a61a5b9034c1bb369c3d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 25 Mar 2006 03:06:55 -0800 Subject: [PATCH] Small cleanup in quota.h Remove unused quota flag. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/quota.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index 8dc2d04a103..2dab71e1c3d 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -209,7 +209,6 @@ extern struct dqstats dqstats; #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ -#define DQ_WAITFREE_B 6 /* dquot being waited (by invalidate_dquots) */ struct dquot { struct hlist_node dq_hash; /* Hash list in memory */ -- cgit v1.2.3 From bdfc326614b90e7bc47ee4a8fed05988555f0169 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 25 Mar 2006 03:06:56 -0800 Subject: [PATCH] fs/inode.c: make iprune_mutex static There's no reason for iprune_mutex being global. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 215696a0f16..7c750312261 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1558,7 +1558,6 @@ extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); extern int remove_suid(struct dentry *); extern void remove_dquot_ref(struct super_block *, int, struct list_head *); -extern struct mutex iprune_mutex; extern void __insert_inode_hash(struct inode *, unsigned long hashval); extern void remove_inode_hash(struct inode *); -- cgit v1.2.3 From 23f9e0f891c9b159a199629d4426f6ae0c383508 Mon Sep 17 00:00:00 2001 From: Alexander Zarochentzev Date: Sat, 25 Mar 2006 03:06:57 -0800 Subject: [PATCH] reiserfs: fix transaction overflowing This patch fixes a bug in reiserfs truncate. A transaction might overflow when truncating long highly fragmented file. The fix is to split truncation into several transactions to avoid overflowing. Signed-off-by: Vladimir V. Saveliev Cc; Charles McColgan Cc: Alexander Zarochentsev Cc: Hans Reiser Cc: Chris Mason Cc: Jeff Mahoney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/reiserfs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index dad78cecfd2..912f1b7cb18 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1704,6 +1704,11 @@ static inline int reiserfs_transaction_running(struct super_block *s) return 0; } +static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th) +{ + return th->t_blocks_allocated - th->t_blocks_logged; +} + int reiserfs_async_progress_wait(struct super_block *s); struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct -- cgit v1.2.3 From b500531e6f5f234ed267bd7060ee06d144faf0ca Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Sat, 25 Mar 2006 03:07:01 -0800 Subject: [PATCH] Introduce FMODE_EXEC file flag Introduce FMODE_EXEC file flag, to indicate that file is being opened for execution. This is useful for distributed filesystems to maintain consistent behavior for returning ETXTBUSY when opening for write and execution happens on different nodes. akpm: Needed by Lustre at present. I assume their objective to to work towards being able to install Lustre on an unmodified distro kernel, which seems sane. It should have zero runtime cost. Trond and Chuck indicate that NFS4 can probably use this too, for the same thing. Steven says it's also on the GFS todo list. Signed-off-by: Oleg Drokin Cc: Trond Myklebust Cc: Chuck Lever Cc: Steven Whitehouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c750312261..21e8cf795c3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -65,6 +65,11 @@ extern int dir_notify_enable; #define FMODE_PREAD 8 #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ +/* File is being opened for execution. Primary users of this flag are + distributed filesystems that can use it to achieve correct ETXTBUSY + behavior for cross-node execution/opening_for_writing of files */ +#define FMODE_EXEC 16 + #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 -- cgit v1.2.3 From 1aef821a6b3aeca8c19d06aee012ed9db617d1e3 Mon Sep 17 00:00:00 2001 From: Thomas Koeller Date: Sat, 25 Mar 2006 03:07:03 -0800 Subject: [PATCH] constify tty flip buffer handling Add a couple of 'const' qualifiers to the TTY flip buffer APIs, where appropriate. Signed-off-by: Thomas Koeller Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tty_flip.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index 0c6169fff36..0976a163b45 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -2,8 +2,8 @@ #define _LINUX_TTY_FLIP_H extern int tty_buffer_request_room(struct tty_struct *tty, size_t size); -extern int tty_insert_flip_string(struct tty_struct *tty, unsigned char *chars, size_t size); -extern int tty_insert_flip_string_flags(struct tty_struct *tty, unsigned char *chars, char *flags, size_t size); +extern int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, size_t size); +extern int tty_insert_flip_string_flags(struct tty_struct *tty, const unsigned char *chars, const char *flags, size_t size); extern int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size); extern int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size); -- cgit v1.2.3 From 8d3b33f67fdc0fb364a1ef6d8fbbea7c2e4e6c98 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 25 Mar 2006 03:07:05 -0800 Subject: [PATCH] Remove MODULE_PARM MODULE_PARM was actually breaking: recent gcc version optimize them out as unused. It's time to replace the last users, which are generally in the most unloved drivers anyway. Signed-off-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 70bd843c71c..e144309836a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -554,25 +554,6 @@ static inline void module_remove_driver(struct device_driver *driver) /* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */ -struct obsolete_modparm { - char name[64]; - char type[64-sizeof(void *)]; - void *addr; -}; - -static inline void MODULE_PARM_(void) { } -#ifdef MODULE -/* DEPRECATED: Do not use. */ -#define MODULE_PARM(var,type) \ -extern struct obsolete_modparm __parm_##var \ -__attribute__((section("__obsparm"))); \ -struct obsolete_modparm __parm_##var = \ -{ __stringify(var), type, &MODULE_PARM_ }; \ -__MODULE_PARM_TYPE(var, type); -#else -#define MODULE_PARM(var,type) static void __attribute__((__unused__)) *__parm_##var = &MODULE_PARM_; -#endif - #define __MODULE_STRING(x) __stringify(x) /* Use symbol_get and symbol_put instead. You'll thank me. */ -- cgit v1.2.3 From 9871728b756646e0d758a966ba00f2c0ff812817 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 25 Mar 2006 03:07:06 -0800 Subject: [PATCH] kernel/params.c: make param_array() static param_array() in kernel/params.c can now become static. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/moduleparam.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index b5c98c43779..7c0c2c198f1 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -162,13 +162,6 @@ extern int param_array_get(char *buffer, struct kernel_param *kp); extern int param_set_copystring(const char *val, struct kernel_param *kp); extern int param_get_string(char *buffer, struct kernel_param *kp); -int param_array(const char *name, - const char *val, - unsigned int min, unsigned int max, - void *elem, int elemsize, - int (*set)(const char *, struct kernel_param *kp), - int *num); - /* for exporting parameters in /sys/parameters */ struct module; -- cgit v1.2.3 From c32ccd87bfd1414b0aabfcd8dbc7539ad23bcbaa Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 25 Mar 2006 03:07:09 -0800 Subject: [PATCH] inotify: lock avoidance with parent watch status in dentry Previous inotify work avoidance is good when inotify is completely unused, but it breaks down if even a single watch is in place anywhere in the system. Robin Holt notices that udev is one such culprit - it slows down a 512-thread application on a 512 CPU system from 6 seconds to 22 minutes. Solve this by adding a flag in the dentry that tells inotify whether or not its parent inode has a watch on it. Event queueing to parent will skip taking locks if this flag is cleared. Setting and clearing of this flag on all child dentries versus event delivery: this is no in terms of race cases, and that was shown to be equivalent to always performing the check. The essential behaviour is that activity occuring _after_ a watch has been added and _before_ it has been removed, will generate events. Signed-off-by: Nick Piggin Cc: Robert Love Cc: John McCutchan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 2 ++ include/linux/fsnotify.h | 19 +++++++++++++++++++ include/linux/inotify.h | 11 +++++++++++ 3 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4361f378997..d10bd30c337 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -162,6 +162,8 @@ d_iput: no no no yes #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ + extern spinlock_t dcache_lock; /** diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 03b8e7932b8..f7e517c1f1b 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -16,6 +16,25 @@ #include #include +/* + * fsnotify_d_instantiate - instantiate a dentry for inode + * Called with dcache_lock held. + */ +static inline void fsnotify_d_instantiate(struct dentry *entry, + struct inode *inode) +{ + inotify_d_instantiate(entry, inode); +} + +/* + * fsnotify_d_move - entry has been moved + * Called with dcache_lock and entry->d_lock held. + */ +static inline void fsnotify_d_move(struct dentry *entry) +{ + inotify_d_move(entry); +} + /* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir */ diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 267c88b5f74..09e00433c78 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -71,6 +71,8 @@ struct inotify_event { #ifdef CONFIG_INOTIFY +extern void inotify_d_instantiate(struct dentry *, struct inode *); +extern void inotify_d_move(struct dentry *); extern void inotify_inode_queue_event(struct inode *, __u32, __u32, const char *); extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, @@ -81,6 +83,15 @@ extern u32 inotify_get_cookie(void); #else +static inline void inotify_d_instantiate(struct dentry *dentry, + struct inode *inode) +{ +} + +static inline void inotify_d_move(struct dentry *dentry) +{ +} + static inline void inotify_inode_queue_event(struct inode *inode, __u32 mask, __u32 cookie, const char *filename) -- cgit v1.2.3 From e6a6784627483381d012b507bb0d49809658a1fa Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Sat, 25 Mar 2006 03:07:13 -0800 Subject: [PATCH] parport: move PP_MAJOR from ppdev.h to major.h Today I wondered about /dev/parport after not seeing anything in drivers/parport register char-major-99. Having PP_MAJOR in include/linux/major.h would've allowed me to more quickly determine that it was the ppdev driver driving these. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/major.h | 1 + include/linux/ppdev.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/major.h b/include/linux/major.h index e36a46702d9..0a74c52924c 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -113,6 +113,7 @@ #define UBD_MAJOR 98 +#define PP_MAJOR 99 #define JSFD_MAJOR 99 #define PHONE_MAJOR 100 diff --git a/include/linux/ppdev.h b/include/linux/ppdev.h index 141c9658682..f376a7598a7 100644 --- a/include/linux/ppdev.h +++ b/include/linux/ppdev.h @@ -14,8 +14,6 @@ * Added PPGETMODES/PPGETMODE/PPGETPHASE, Fred Barnes , 03/01/2001 */ -#define PP_MAJOR 99 - #define PP_IOCTL 'p' /* Set mode for read/write (e.g. IEEE1284_MODE_EPP) */ -- cgit v1.2.3 From cd02b966bfcad12d1b2e265dc8dbc331d4c184c4 Mon Sep 17 00:00:00 2001 From: "Vladimir V. Saveliev" Date: Sat, 25 Mar 2006 03:07:15 -0800 Subject: [PATCH] reiserfs: cleanups Clean up several places where gcc issues warnings when -W is specified. Thanks to Neil for finding that. Signed-off-by: Vladimir V. Saveliev Cc: Neil Brown Signed-off-by: Hans Reiser Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/reiserfs_xattr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index 87280eb6083..5353afb11db 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -101,13 +101,13 @@ static inline void reiserfs_mark_inode_private(struct inode *inode) #else #define is_reiserfs_priv_object(inode) 0 -#define reiserfs_mark_inode_private(inode) +#define reiserfs_mark_inode_private(inode) do {;} while(0) #define reiserfs_getxattr NULL #define reiserfs_setxattr NULL #define reiserfs_listxattr NULL #define reiserfs_removexattr NULL -#define reiserfs_write_lock_xattrs(sb) -#define reiserfs_write_unlock_xattrs(sb) +#define reiserfs_write_lock_xattrs(sb) do {;} while(0) +#define reiserfs_write_unlock_xattrs(sb) do {;} while(0) #define reiserfs_read_lock_xattrs(sb) #define reiserfs_read_unlock_xattrs(sb) -- cgit v1.2.3 From e51c01b08474ea454a965a937fff0407ab6714c7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 25 Mar 2006 03:07:17 -0800 Subject: [PATCH] hp300: fix driver_register() return handling, remove dio_module_init() Remove the assumption that driver_register() returns the number of devices bound to the driver. In fact, it returns zero for success or a negative error value. dio_module_init() used the device count to automatically unregister and unload drivers that found no devices. That might have worked at one time, but has been broken for some time because dio_register_driver() returned either a negative error or a positive count (never zero). So it could only unregister on failure, when it's not needed anyway. This functionality could be resurrected in individual drivers by counting devices in their .probe() methods. Signed-off-by: Bjorn Helgaas Cc: Philip Blundell Cc: Jochen Friedrich Cc: "Antonino A. Daplas" Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dio.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dio.h b/include/linux/dio.h index fae9395fcf4..1e65ebc2a3d 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -276,37 +276,5 @@ static inline void dio_set_drvdata (struct dio_dev *d, void *data) dev_set_drvdata(&d->dev, data); } -/* - * A helper function which helps ensure correct dio_driver - * setup and cleanup for commonly-encountered hotplug/modular cases - * - * This MUST stay in a header, as it checks for -DMODULE - */ -static inline int dio_module_init(struct dio_driver *drv) -{ - int rc = dio_register_driver(drv); - - if (rc > 0) - return 0; - - /* iff CONFIG_HOTPLUG and built into kernel, we should - * leave the driver around for future hotplug events. - * For the module case, a hotplug daemon of some sort - * should load a module in response to an insert event. */ -#if defined(CONFIG_HOTPLUG) && !defined(MODULE) - if (rc == 0) - return 0; -#else - if (rc == 0) - rc = -ENODEV; -#endif - - /* if we get here, we need to clean up DIO driver instance - * and return some sort of error */ - dio_unregister_driver(drv); - - return rc; -} - #endif /* __KERNEL__ */ #endif /* ndef _LINUX_DIO_H */ -- cgit v1.2.3 From 33d8675ea66e79d21da3ed64ce88dfb2a18bc6a7 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sat, 25 Mar 2006 03:07:20 -0800 Subject: [PATCH] amiga: fix driver_register() return handling, remove zorro_module_init() Remove the assumption that driver_register() returns the number of devices bound to the driver. In fact, it returns zero for success or a negative error value. zorro_module_init() used the device count to automatically unregister and unload drivers that found no devices. That might have worked at one time, but has been broken for some time because zorro_register_driver() returned either a negative error or a positive count (never zero). So it could only unregister on failure, when it's not needed anyway. This functionality could be resurrected in individual drivers by counting devices in their .probe() methods. Signed-off-by: Bjorn Helgaas Cc: Geert Uytterhoeven Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zorro.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zorro.h b/include/linux/zorro.h index ba5b72768bb..2f135cf6eef 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -271,39 +271,6 @@ static inline void zorro_set_drvdata (struct zorro_dev *z, void *data) } -/* - * A helper function which helps ensure correct zorro_driver - * setup and cleanup for commonly-encountered hotplug/modular cases - * - * This MUST stay in a header, as it checks for -DMODULE - */ -static inline int zorro_module_init(struct zorro_driver *drv) -{ - int rc = zorro_register_driver(drv); - - if (rc > 0) - return 0; - - /* iff CONFIG_HOTPLUG and built into kernel, we should - * leave the driver around for future hotplug events. - * For the module case, a hotplug daemon of some sort - * should load a module in response to an insert event. */ -#if defined(CONFIG_HOTPLUG) && !defined(MODULE) - if (rc == 0) - return 0; -#else - if (rc == 0) - rc = -ENODEV; -#endif - - /* if we get here, we need to clean up Zorro driver instance - * and return some sort of error */ - zorro_unregister_driver(drv); - - return rc; -} - - /* * Bitmask indicating portions of available Zorro II RAM that are unused * by the system. Every bit represents a 64K chunk, for a maximum of 8MB -- cgit v1.2.3 From c777ac5594f772ac760e02c3ac71d067616b579d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Mar 2006 03:07:36 -0800 Subject: [PATCH] irq: uninline migration functions Uninline some massive IRQ migration functions. Put them in the new kernel/irq/migration.c. Cc: Andi Kleen Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq.h | 49 ++----------------------------------------------- 1 file changed, 2 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 6c5d4c898cc..ee2a82a572f 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -114,53 +114,8 @@ static inline void set_native_irq_info(int irq, cpumask_t mask) #if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) extern cpumask_t pending_irq_cpumask[NR_IRQS]; -static inline void set_pending_irq(unsigned int irq, cpumask_t mask) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - desc->move_irq = 1; - pending_irq_cpumask[irq] = mask; - spin_unlock_irqrestore(&desc->lock, flags); -} - -static inline void -move_native_irq(int irq) -{ - cpumask_t tmp; - irq_desc_t *desc = irq_descp(irq); - - if (likely (!desc->move_irq)) - return; - - desc->move_irq = 0; - - if (likely(cpus_empty(pending_irq_cpumask[irq]))) - return; - - if (!desc->handler->set_affinity) - return; - - /* note - we hold the desc->lock */ - cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); - - /* - * If there was a valid mask to work with, please - * do the disable, re-program, enable sequence. - * This is *not* particularly important for level triggered - * but in a edge trigger case, we might be setting rte - * when an active trigger is comming in. This could - * cause some ioapics to mal-function. - * Being paranoid i guess! - */ - if (unlikely(!cpus_empty(tmp))) { - desc->handler->disable(irq); - desc->handler->set_affinity(irq,tmp); - desc->handler->enable(irq); - } - cpus_clear(pending_irq_cpumask[irq]); -} +void set_pending_irq(unsigned int irq, cpumask_t mask); +void move_native_irq(int irq); #ifdef CONFIG_PCI_MSI /* -- cgit v1.2.3 From 77d47582c2345e071df02afaf9191641009287c4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 25 Mar 2006 03:07:39 -0800 Subject: [PATCH] add a proper prototype for setup_arch() This patch adds a proper prototype for setup_arch() in init.h. This patch is based on a patch by Ben Dooks . Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index ff8d8b8632f..ed0ac7c39fd 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -69,6 +69,10 @@ extern initcall_t __security_initcall_start[], __security_initcall_end[]; /* Defined in init/main.c */ extern char saved_command_line[]; + +/* used by init/main.c */ +extern void setup_arch(char **); + #endif #ifndef MODULE -- cgit v1.2.3 From 12b5989be10011387a9da5dee82e5c0d6f9d02e7 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Sat, 25 Mar 2006 03:07:41 -0800 Subject: [PATCH] refactor capable() to one implementation, add __capable() helper Move capable() to kernel/capability.c and eliminate duplicate implementations. Add __capable() function which can be used to check for capabiilty of any process. Signed-off-by: Chris Wright Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/capability.h | 3 ++- include/linux/security.h | 22 ++++++++++++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 5a23ce75262..6548b35ab9f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -357,7 +357,8 @@ static inline kernel_cap_t cap_invert(kernel_cap_t c) #define cap_is_fs_cap(c) (CAP_TO_MASK(c) & CAP_FS_MASK) -extern int capable(int cap); +int capable(int cap); +int __capable(struct task_struct *t, int cap); #endif /* __KERNEL__ */ diff --git a/include/linux/security.h b/include/linux/security.h index b18eb8cfa63..3c19be35124 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1040,6 +1040,11 @@ struct swap_info_struct; * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. + * @capable: + * Check whether the @tsk process has the @cap capability. + * @tsk contains the task_struct for the process. + * @cap contains the capability . + * Return 0 if the capability is granted for @tsk. * @acct: * Check permission before enabling or disabling process accounting. If * accounting is being enabled, then @file refers to the open file used to @@ -1053,11 +1058,6 @@ struct swap_info_struct; * @table contains the ctl_table structure for the sysctl variable. * @op contains the operation (001 = search, 002 = write, 004 = read). * Return 0 if permission is granted. - * @capable: - * Check whether the @tsk process has the @cap capability. - * @tsk contains the task_struct for the process. - * @cap contains the capability . - * Return 0 if the capability is granted for @tsk. * @syslog: * Check permission before accessing the kernel message ring or changing * logging to the console. @@ -1099,9 +1099,9 @@ struct security_operations { kernel_cap_t * effective, kernel_cap_t * inheritable, kernel_cap_t * permitted); + int (*capable) (struct task_struct * tsk, int cap); int (*acct) (struct file * file); int (*sysctl) (struct ctl_table * table, int op); - int (*capable) (struct task_struct * tsk, int cap); int (*quotactl) (int cmds, int type, int id, struct super_block * sb); int (*quota_on) (struct dentry * dentry); int (*syslog) (int type); @@ -1347,6 +1347,11 @@ static inline void security_capset_set (struct task_struct *target, security_ops->capset_set (target, effective, inheritable, permitted); } +static inline int security_capable(struct task_struct *tsk, int cap) +{ + return security_ops->capable(tsk, cap); +} + static inline int security_acct (struct file *file) { return security_ops->acct (file); @@ -2050,6 +2055,11 @@ static inline void security_capset_set (struct task_struct *target, cap_capset_set (target, effective, inheritable, permitted); } +static inline int security_capable(struct task_struct *tsk, int cap) +{ + return cap_capable(tsk, cap); +} + static inline int security_acct (struct file *file) { return 0; -- cgit v1.2.3 From 962749af67b145c57917bfbff3c303ebd7d5988c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Mar 2006 03:08:01 -0800 Subject: [PATCH] roundup_pow_of_two() 64-bit fix fls() takes an integer, so roundup_pow_of_two() is busted for ulongs larger than 2^32-1. Fix this by implementing and using fls_long(). (Why does roundup_pow_of_two() return a long?) (Why is roundup_pow_of_two() __attribute_const__ whereas long_log2() is __attribute_pure__?) (Why does long_log2() suck so much? Because we were missing fls_long()?) Cc: Roland Dreier Cc: "Chen, Kenneth W" Cc: John Hawkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 7 +++++++ include/linux/kernel.h | 5 +++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 208650b1ad3..f17525a963d 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -175,4 +175,11 @@ static inline __u32 ror32(__u32 word, unsigned int shift) return (word >> shift) | (word << (32 - shift)); } +static inline unsigned fls_long(unsigned long l) +{ + if (sizeof(l) == 4) + return fls(l); + return fls64(l); +} + #endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bb6e7ddee2f..03d6cfaa5b8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -154,9 +154,10 @@ static inline int __attribute_pure__ long_log2(unsigned long x) return r; } -static inline unsigned long __attribute_const__ roundup_pow_of_two(unsigned long x) +static inline unsigned long +__attribute_const__ roundup_pow_of_two(unsigned long x) { - return (1UL << fls(x - 1)); + return 1UL << fls_long(x - 1); } extern int printk_ratelimit(void); -- cgit v1.2.3 From daff89f324755f87a060d5125a205c0755811ea9 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sat, 25 Mar 2006 03:08:05 -0800 Subject: [PATCH] radix-tree documentation cleanups Documentation changes to help radix tree users avoid overrunning the tags array. RADIX_TREE_TAGS moves to linux/radix-tree.h and is now known as RADIX_TREE_MAX_TAGS (Nick Piggin's idea). Tag parameters are changed to unsigned, and some comments are updated. Signed-off-by: Jonathan Corbet Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c57ff2fcb30..dd83cca2800 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -45,6 +45,8 @@ do { \ (root)->rnode = NULL; \ } while (0) +#define RADIX_TREE_MAX_TAGS 2 + int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); @@ -55,15 +57,16 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, int radix_tree_preload(gfp_t gfp_mask); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, - unsigned long index, int tag); + unsigned long index, unsigned int tag); void *radix_tree_tag_clear(struct radix_tree_root *root, - unsigned long index, int tag); + unsigned long index, unsigned int tag); int radix_tree_tag_get(struct radix_tree_root *root, - unsigned long index, int tag); + unsigned long index, unsigned int tag); unsigned int radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items, int tag); -int radix_tree_tagged(struct radix_tree_root *root, int tag); + unsigned long first_index, unsigned int max_items, + unsigned int tag); +int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); static inline void radix_tree_preload_end(void) { -- cgit v1.2.3 From ccb46000f4bb459777686611157ac0eac928704e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Mar 2006 03:08:08 -0800 Subject: [PATCH] cpumask: uninline first_cpu() text data bss dec hex filename before: 3490577 1322408 360000 5172985 4eeef9 vmlinux after: 3488027 1322496 360128 5170651 4ee5db vmlinux Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 60e56c6e03d..9b702fd24a7 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -212,11 +212,12 @@ static inline void __cpus_shift_left(cpumask_t *dstp, bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } -#define first_cpu(src) __first_cpu(&(src), NR_CPUS) -static inline int __first_cpu(const cpumask_t *srcp, int nbits) -{ - return min_t(int, nbits, find_first_bit(srcp->bits, nbits)); -} +#ifdef CONFIG_SMP +int __first_cpu(const cpumask_t *srcp); +#define first_cpu(src) __first_cpu(&(src)) +#else +#define first_cpu(src) 0 +#endif #define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS) static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits) -- cgit v1.2.3 From 3d18bd74a22d0bed3bc81fc64c4ba6344a10f155 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Mar 2006 03:08:09 -0800 Subject: [PATCH] cpumask: uninline next_cpu() text data bss dec hex filename before: 3488027 1322496 360128 5170651 4ee5db vmlinux after: 3485112 1322480 359968 5167560 4ed9c8 vmlinux 2931 bytes saved Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9b702fd24a7..4b29e508a0b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -215,16 +215,13 @@ static inline void __cpus_shift_left(cpumask_t *dstp, #ifdef CONFIG_SMP int __first_cpu(const cpumask_t *srcp); #define first_cpu(src) __first_cpu(&(src)) +int __next_cpu(int n, const cpumask_t *srcp); +#define next_cpu(n, src) __next_cpu((n), &(src)) #else -#define first_cpu(src) 0 +#define first_cpu(src) 0 +#define next_cpu(n, src) 1 #endif -#define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS) -static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits) -{ - return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1)); -} - #define cpumask_of_cpu(cpu) \ ({ \ typeof(_unused_cpumask_arg_) m; \ -- cgit v1.2.3 From 8630282070b4a52b12cfa514ba8558e2f3d56360 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Mar 2006 03:08:09 -0800 Subject: [PATCH] cpumask: uninline highest_possible_processor_id() Shrinks the only caller (net/bridge/netfilter/ebtables.c) by 174 bytes. Also, optimise highest_possible_processor_id() out of existence on CONFIG_SMP=n. Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 4b29e508a0b..f770039344c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -396,6 +396,12 @@ extern cpumask_t cpu_present_map; #define cpu_present(cpu) ((cpu) == 0) #endif +#ifdef CONFIG_SMP +int highest_possible_processor_id(void); +#else +#define highest_possible_processor_id() 0 +#endif + #define any_online_cpu(mask) \ ({ \ int cpu; \ @@ -409,14 +415,5 @@ extern cpumask_t cpu_present_map; #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) -/* Find the highest possible smp_processor_id() */ -#define highest_possible_processor_id() \ -({ \ - unsigned int cpu, highest = 0; \ - for_each_cpu_mask(cpu, cpu_possible_map) \ - highest = cpu; \ - highest; \ -}) - #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From 96a9b4d31eba4722ba7aad2cc15118a7799f499f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Mar 2006 03:08:10 -0800 Subject: [PATCH] cpumask: uninline any_online_cpu() text data bss dec hex filename before: 3605597 1363528 363328 5332453 515de5 vmlinux after: 3605295 1363612 363200 5332107 515c8b vmlinux 218 bytes saved. Also, optimise any_online_cpu() out of existence on CONFIG_SMP=n. This function seems inefficient. Can't we simply AND the two masks, then use find_first_bit()? Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f770039344c..99e6115d8e5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -398,22 +398,15 @@ extern cpumask_t cpu_present_map; #ifdef CONFIG_SMP int highest_possible_processor_id(void); +#define any_online_cpu(mask) __any_online_cpu(&(mask)) +int __any_online_cpu(const cpumask_t *mask); #else #define highest_possible_processor_id() 0 +#define any_online_cpu(mask) 0 #endif -#define any_online_cpu(mask) \ -({ \ - int cpu; \ - for_each_cpu_mask(cpu, (mask)) \ - if (cpu_online(cpu)) \ - break; \ - cpu; \ -}) - #define for_each_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) - #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.3 From 34f361ade2fb4a869f6a7714d01c04ce4cfa75d9 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Mar 2006 03:08:18 -0800 Subject: [PATCH] Check if cpu can be onlined before calling smp_prepare_cpu() - Moved check for online cpu out of smp_prepare_cpu() - Moved default declaration of smp_prepare_cpu() to kernel/cpu.c - Removed lock_cpu_hotplug() from smp_prepare_cpu() to around it, since its called from cpu_up() as well now. - Removed clearing from cpu_present_map during cpu_offline as it breaks using cpu_up() directly during a subsequent online operation. Signed-off-by: Ashok Raj Cc: Srivatsa Vaddagiri Cc: "Li, Shaohua" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d612b89dce3..08d50c53aab 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -74,7 +74,6 @@ extern int lock_cpu_hotplug_interruptible(void); register_cpu_notifier(&fn##_nb); \ } int cpu_down(unsigned int cpu); -extern int __attribute__((weak)) smp_prepare_cpu(int cpu); #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) #else #define lock_cpu_hotplug() do { } while (0) -- cgit v1.2.3 From d12ddde2bbf46b34eae3fb3fd36c0e42832b537c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 25 Mar 2006 03:08:21 -0800 Subject: [PATCH] udf: remove duplicate definitions This patch removes duplicate definitions from include/linux/udf_fs_i.h which are already defined in fs/udf/ecma_167.h. Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/udf_fs_i.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udf_fs_i.h b/include/linux/udf_fs_i.h index 1e7508420fc..ffaf05679ff 100644 --- a/include/linux/udf_fs_i.h +++ b/include/linux/udf_fs_i.h @@ -15,27 +15,6 @@ #ifdef __KERNEL__ -#ifndef _ECMA_167_H -typedef struct -{ - __u32 logicalBlockNum; - __u16 partitionReferenceNum; -} __attribute__ ((packed)) lb_addr; - -typedef struct -{ - __u32 extLength; - __u32 extPosition; -} __attribute__ ((packed)) short_ad; - -typedef struct -{ - __u32 extLength; - lb_addr extLocation; - __u8 impUse[6]; -} __attribute__ ((packed)) long_ad; -#endif - struct udf_inode_info { struct timespec i_crtime; -- cgit v1.2.3 From 5ddcfa878d5b10b0ab94251a4229a8a9daaf93ed Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sat, 25 Mar 2006 03:08:28 -0800 Subject: [PATCH] remove pps support This removes the support for pps. It's completely unused within the kernel and is basically in the way for further cleanups. It should be easier to readd proper support for it after the rest has been converted to NTP4 (where the pps mechanisms are quite different from NTP3 anyway). Signed-off-by: Roman Zippel Cc: Adrian Bunk Cc: john stultz Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/timex.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index b7ca1204e42..82dc9ae79d3 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -97,37 +97,10 @@ #define MAXPHASE 512000L /* max phase error (us) */ #define MAXFREQ (512L << SHIFT_USEC) /* max frequency error (ppm) */ -#define MAXTIME (200L << PPS_AVG) /* max PPS error (jitter) (200 us) */ #define MINSEC 16L /* min interval between updates (s) */ #define MAXSEC 1200L /* max interval between updates (s) */ #define NTP_PHASE_LIMIT (MAXPHASE << 5) /* beyond max. dispersion */ -/* - * The following defines are used only if a pulse-per-second (PPS) - * signal is available and connected via a modem control lead, such as - * produced by the optional ppsclock feature incorporated in the Sun - * asynch driver. They establish the design parameters of the frequency- - * lock loop used to discipline the CPU clock oscillator to the PPS - * signal. - * - * PPS_AVG is the averaging factor for the frequency loop, as well as - * the time and frequency dispersion. - * - * PPS_SHIFT and PPS_SHIFTMAX specify the minimum and maximum - * calibration intervals, respectively, in seconds as a power of two. - * - * PPS_VALID is the maximum interval before the PPS signal is considered - * invalid and protocol updates used directly instead. - * - * MAXGLITCH is the maximum interval before a time offset of more than - * MAXTIME is believed. - */ -#define PPS_AVG 2 /* pps averaging constant (shift) */ -#define PPS_SHIFT 2 /* min interval duration (s) (shift) */ -#define PPS_SHIFTMAX 8 /* max interval duration (s) (shift) */ -#define PPS_VALID 120 /* pps signal watchdog max (s) */ -#define MAXGLITCH 30 /* pps signal glitch max (s) */ - /* * syscall interface - used (mainly by NTP daemon) * to discipline kernel clock oscillator @@ -246,20 +219,6 @@ extern long time_reftime; /* time at last adjustment (s) */ extern long time_adjust; /* The amount of adjtime left */ extern long time_next_adjust; /* Value for time_adjust at next tick */ -/* interface variables pps->timer interrupt */ -extern long pps_offset; /* pps time offset (us) */ -extern long pps_jitter; /* time dispersion (jitter) (us) */ -extern long pps_freq; /* frequency offset (scaled ppm) */ -extern long pps_stabil; /* frequency dispersion (scaled ppm) */ -extern long pps_valid; /* pps signal watchdog counter */ - -/* interface variables pps->adjtimex */ -extern int pps_shift; /* interval duration (s) (shift) */ -extern long pps_jitcnt; /* jitter limit exceeded */ -extern long pps_calcnt; /* calibration intervals */ -extern long pps_errcnt; /* calibration errors */ -extern long pps_stbcnt; /* stability limit exceeded */ - /** * ntp_clear - Clears the NTP state variables * -- cgit v1.2.3 From f083a329e63d471a5e9238e837772b1b76c218db Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:19 +0100 Subject: [PATCH] x86_64: Clean up and tweak ACPI blacklist year code - Move the core parser into dmi_scan.c. It can be useful for other subsystems too. - Differentiate between field doesn't exist and field is 0 or unparseable. The first case is likely an old BIOS with broken ACPI, the later is likely a slightly buggy BIOS where someone forget to edit the date. Don't blacklist in the later case. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/dmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 2e6bbe01415..64fd6c36660 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -68,6 +68,7 @@ extern char * dmi_get_system_info(int field); extern struct dmi_device * dmi_find_device(int type, const char *name, struct dmi_device *from); extern void dmi_scan_machine(void); +extern int dmi_get_year(int field); #else @@ -75,6 +76,7 @@ static inline int dmi_check_system(struct dmi_system_id *list) { return 0; } static inline char * dmi_get_system_info(int field) { return NULL; } static inline struct dmi_device * dmi_find_device(int type, const char *name, struct dmi_device *from) { return NULL; } +static inline int dmi_get_year(int year) { return 0; } #endif -- cgit v1.2.3 From 267b48014a5c0c2ae90b04dad5d95ceb903365a6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:10 +0100 Subject: [PATCH] x86_64: Try to allocate node memmap near the end of node This fixes problems with very large nodes (over 128GB) filling up all of the first 4GB with their mem_map and not leaving enough space for the swiotlb. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 993da8cc970..7155452fb4a 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -51,6 +51,9 @@ extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, unsigned long align, unsigned long goal, + unsigned long limit); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE extern void __init reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ -- cgit v1.2.3 From 686f8c5d77149f78ff6090dde774b2e43a7319b2 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Sat, 25 Mar 2006 18:15:24 +0000 Subject: include/linux/clk.h is betraying its ARM origins include/linux/clk.h is betraying its ARM origins. Signed-off-by: Todd Poynor Signed-off-by: Russell King --- include/linux/clk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index 12848f81bb3..5ca8c6fddb5 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -8,8 +8,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#ifndef ASMARM_CLOCK_H -#define ASMARM_CLOCK_H +#ifndef __LINUX_CLK_H +#define __LINUX_CLK_H struct device; -- cgit v1.2.3 From 104c7b03ea0913a24be103db66d8cf1f1f99a49a Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 25 Mar 2006 23:03:13 +0000 Subject: [ARM] 3383/3: ixp2000: ixdp2x01 platform serial conversion Patch from Lennert Buytenhek Add a PLAT8250_DEV_PLATFORM2, and convert the two ixdp2x01 CPLD serial ports to use platform serial devices with ids PLAT8250_DEV_PLATFORM[12]. (The on-chip xscale UART is PLAT8250_DEV_PLATFORM, id #0.) Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King --- include/linux/serial_8250.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 73b464f0926..8e968141372 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -37,6 +37,7 @@ enum { PLAT8250_DEV_LEGACY = -1, PLAT8250_DEV_PLATFORM, PLAT8250_DEV_PLATFORM1, + PLAT8250_DEV_PLATFORM2, PLAT8250_DEV_FOURPORT, PLAT8250_DEV_ACCENT, PLAT8250_DEV_BOCA, -- cgit v1.2.3 From 64a07bd82ed526d813b64b0957543eef55bdf9c0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sun, 26 Mar 2006 01:36:55 -0800 Subject: [PATCH] protect remove_proc_entry It has been discovered that the remove_proc_entry has a race in the removing of entries in the proc file system that are siblings. There's no protection around the traversing and removing of elements that belong in the same subdirectory. This subdirectory list is protected in other areas by the BKL. So the BKL was at first used to protect this area too, but unfortunately, remove_proc_entry may be called with spinlocks held. The BKL may schedule, so this was not a solution. The final solution was to add a new global spin lock to protect this list, called proc_subdir_lock. This lock now protects the list in remove_proc_entry, and I also went around looking for other areas that this list is modified and added this protection there too. Care must be taken since these locations call several functions that may also schedule. Since I don't see any location that these functions that modify the subdirectory list are called by interrupts, the irqsave/restore versions of the spin lock was _not_ used. Signed-off-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index aa6322d4519..6b12b0f661b 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -4,6 +4,7 @@ #include #include #include +#include #include /* @@ -92,6 +93,8 @@ extern struct proc_dir_entry *proc_bus; extern struct proc_dir_entry *proc_root_driver; extern struct proc_dir_entry *proc_root_kcore; +extern spinlock_t proc_subdir_lock; + extern void proc_root_init(void); extern void proc_misc_init(void); -- cgit v1.2.3 From 03beb07664d768db97bf454ae5c9581cd4737bb4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 26 Mar 2006 01:36:57 -0800 Subject: [PATCH] Add API for flushing Anon pages Currently, get_user_pages() returns fully coherent pages to the kernel for anything other than anonymous pages. This is a problem for things like fuse and the SCSI generic ioctl SG_IO which can potentially wish to do DMA to anonymous pages passed in by users. The fix is to add a new memory management API: flush_anon_page() which is used in get_user_pages() to make anonymous pages coherent. Signed-off-by: James Bottomley Cc: Russell King Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 6bece9280eb..7bd2593dbef 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -7,6 +7,12 @@ #include +#ifndef ARCH_HAS_FLUSH_ANON_PAGE +static inline void flush_anon_page(struct page *page, unsigned long vmaddr) +{ +} +#endif + #ifdef CONFIG_HIGHMEM #include -- cgit v1.2.3 From 5a3a5a98b6422d05c39eaa32c8b3f83840c7b768 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 26 Mar 2006 01:36:59 -0800 Subject: [PATCH] Add flush_kernel_dcache_page() API We have a problem in a lot of emulated storage in that it takes a page from get_user_pages() and does something like kmap_atomic(page) modify page kunmap_atomic(page) However, nothing has flushed the kernel cache view of the page before the kunmap. We need a lightweight API to do this, so this new API would specifically be for flushing the kernel cache view of a user page which the kernel has modified. The driver would need to add flush_kernel_dcache_page(page) before the final kunmap. Signed-off-by: James Bottomley Cc: Russell King Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7bd2593dbef..892c4ea1b42 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -13,6 +13,12 @@ static inline void flush_anon_page(struct page *page, unsigned long vmaddr) } #endif +#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +static inline void flush_kernel_dcache_page(struct page *page) +{ +} +#endif + #ifdef CONFIG_HIGHMEM #include -- cgit v1.2.3 From 136939a2b5aa4302281215745ccd567e1df2e8d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sun, 26 Mar 2006 01:37:05 -0800 Subject: [PATCH] EFI, /dev/mem: simplify efi_mem_attribute_range() Pass the size, not a pointer to the size, to efi_mem_attribute_range(). This function validates memory regions for the /dev/mem read/write/mmap paths. The pointer allows arches to reduce the size of the range, but I think that's unnecessary complexity. Simplifying it will let me use efi_mem_attribute_range() to improve the ia64 ioremap() implementation. Signed-off-by: Bjorn Helgaas Cc: Matt Domsch Cc: "Tolentino, Matthew E" Cc: "Brown, Len" Cc: Andi Kleen Acked-by: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index c7c5dd31618..d15725470aa 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -292,6 +292,8 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); +extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, + u64 attr); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource); -- cgit v1.2.3 From b2c99e3c70d77fb194df5aa1642030080d28ea48 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sun, 26 Mar 2006 01:37:08 -0800 Subject: [PATCH] EFI: keep physical table addresses in efi structure Almost all users of the table addresses from the EFI system table want physical addresses. So rather than doing the pa->va->pa conversion, just keep physical addresses in struct efi. This fixes a DMI bug: the efi structure contained the physical SMBIOS address on x86 but the virtual address on ia64, so dmi_scan_machine() used ioremap() on a virtual address on ia64. This is essentially the same as an earlier patch by Matt Tolentino: http://marc.theaimsgroup.com/?l=linux-kernel&m=112130292316281&w=2 except that this changes all table addresses, not just ACPI addresses. Matt's original patch was backed out because it caused MCAs on HP sx1000 systems. That problem is resolved by the ioremap() attribute checking added for ia64. Signed-off-by: Bjorn Helgaas Cc: Matt Domsch Cc: "Tolentino, Matthew E" Cc: "Brown, Len" Cc: Andi Kleen Acked-by: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/efi.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d15725470aa..e203613d3ae 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -240,19 +240,21 @@ struct efi_memory_map { unsigned long desc_size; }; +#define EFI_INVALID_TABLE_ADDR (~0UL) + /* * All runtime access to EFI goes through this structure: */ extern struct efi { efi_system_table_t *systab; /* EFI system table */ - void *mps; /* MPS table */ - void *acpi; /* ACPI table (IA64 ext 0.71) */ - void *acpi20; /* ACPI table (ACPI 2.0) */ - void *smbios; /* SM BIOS table */ - void *sal_systab; /* SAL system table */ - void *boot_info; /* boot info table */ - void *hcdp; /* HCDP table */ - void *uga; /* UGA table */ + unsigned long mps; /* MPS table */ + unsigned long acpi; /* ACPI table (IA64 ext 0.71) */ + unsigned long acpi20; /* ACPI table (ACPI 2.0) */ + unsigned long smbios; /* SM BIOS table */ + unsigned long sal_systab; /* SAL system table */ + unsigned long boot_info; /* boot info table */ + unsigned long hcdp; /* HCDP table */ + unsigned long uga; /* UGA table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; -- cgit v1.2.3 From 3978d7179d3849848df8a37dd0a5acc20bcb8750 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 26 Mar 2006 01:37:17 -0800 Subject: [PATCH] Make address_space_operations->sync_page return void The only user ignores the return value, and the only instanace (block_sync_page) always returns 0... Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9f159baf153..27dd97b3fce 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -200,7 +200,7 @@ int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, int generic_cont_expand(struct inode *inode, loff_t size); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int block_sync_page(struct page *); +void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 5adf32b90f3..972435d4df5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -350,7 +350,7 @@ struct writeback_control; struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); - int (*sync_page)(struct page *); + void (*sync_page)(struct page *); /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); -- cgit v1.2.3 From 2ff28e22bdb8727fbc7d7889807bc5a73aae56c5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 26 Mar 2006 01:37:18 -0800 Subject: [PATCH] Make address_space_operations->invalidatepage return void The return value of this function is never used, so let's be honest and declare it as void. Some places where invalidatepage returned 0, I have inserted comments suggesting a BUG_ON. [akpm@osdl.org: JBD BUG fix] [akpm@osdl.org: rework for git-nfs] [akpm@osdl.org: don't go BUG in block_invalidate_page()] Signed-off-by: Neil Brown Acked-by: Dave Kleikamp Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 4 ++-- include/linux/fs.h | 2 +- include/linux/jbd.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 27dd97b3fce..da917ed096a 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -189,8 +189,8 @@ extern int buffer_heads_over_limit; * address_spaces. */ int try_to_release_page(struct page * page, gfp_t gfp_mask); -int block_invalidatepage(struct page *page, unsigned long offset); -int do_invalidatepage(struct page *page, unsigned long offset); +void block_invalidatepage(struct page *page, unsigned long offset); +void do_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int block_read_full_page(struct page*, get_block_t*); diff --git a/include/linux/fs.h b/include/linux/fs.h index 972435d4df5..9674679525f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -369,7 +369,7 @@ struct address_space_operations { int (*commit_write)(struct file *, struct page *, unsigned, unsigned); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); - int (*invalidatepage) (struct page *, unsigned long); + void (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, gfp_t); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 4fc7dffd66e..6a425e370cb 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -895,7 +895,7 @@ extern int journal_dirty_metadata (handle_t *, struct buffer_head *); extern void journal_release_buffer (handle_t *, struct buffer_head *); extern int journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); -extern int journal_invalidatepage(journal_t *, +extern void journal_invalidatepage(journal_t *, struct page *, unsigned long); extern int journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int journal_stop(handle_t *); -- cgit v1.2.3 From 3c30b06df404c8892c225a99ecfd3f02789c0513 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sun, 26 Mar 2006 01:37:19 -0800 Subject: [PATCH] cleanup smp_call_function UP build net/core/flow.c: In function 'flow_cache_flush': net/core/flow.c:299: warning: statement with no effect Signed-off-by: Con Kolivas Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smp.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index d699a16b0cb..e2fa3ab4afc 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -82,7 +82,11 @@ void smp_prepare_boot_cpu(void); */ #define raw_smp_processor_id() 0 #define hard_smp_processor_id() 0 -#define smp_call_function(func,info,retry,wait) ({ 0; }) +static inline int up_smp_call_function(void) +{ + return 0; +} +#define smp_call_function(func,info,retry,wait) (up_smp_call_function()) #define on_each_cpu(func,info,retry,wait) \ ({ \ local_irq_disable(); \ -- cgit v1.2.3 From 50c812b2b9513e3df34eae8c30cb2c221b79b2cb Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Sun, 26 Mar 2006 01:37:21 -0800 Subject: [PATCH] ipmi: add full sysfs support Add full driver model support for the IPMI driver. It links in the proper bus and device support. It adds an "ipmi" driver interface that has each BMC discovered by the driver (as a device). These BMCs appear in the devices/platform directory. If there are multiple interfaces to the same BMC, the driver should discover this and will only have one BMC entry. The BMC entry will have pointers to each interface device that connects to it. The device information (statistics and config information) has not yet been ported over to the driver model from proc, that will come later. This work was based on work by Yani Ioannou. I basically rewrote it using that code as a guide, but he still deserves credit :). [bunk@stusta.de: make ipmi_find_bmc_guid() static] Signed-off-by: Corey Minyard Signed-off-by: Yani Ioannou Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 3 ++- include/linux/ipmi_msgdefs.h | 1 + include/linux/ipmi_smi.h | 47 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 48 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index d6276e60b3b..0a84b56935c 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -36,6 +36,7 @@ #include #include +#include /* * This file describes an interface to an IPMI driver. You have to @@ -397,7 +398,7 @@ struct ipmi_smi_watcher the watcher list. So you can add and remove users from the IPMI interface, send messages, etc., but you cannot add or remove SMI watchers or SMI interfaces. */ - void (*new_smi)(int if_num); + void (*new_smi)(int if_num, struct device *dev); void (*smi_gone)(int if_num); }; diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index 03bc64dc2ec..22f5e2afda4 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -47,6 +47,7 @@ #define IPMI_NETFN_APP_RESPONSE 0x07 #define IPMI_GET_DEVICE_ID_CMD 0x01 #define IPMI_CLEAR_MSG_FLAGS_CMD 0x30 +#define IPMI_GET_DEVICE_GUID_CMD 0x08 #define IPMI_GET_MSG_FLAGS_CMD 0x31 #define IPMI_SEND_MSG_CMD 0x34 #define IPMI_GET_MSG_CMD 0x33 diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index e36ee157ad6..53571288a9f 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -37,6 +37,9 @@ #include #include #include +#include +#include +#include /* This files describes the interface for IPMI system management interface drivers to bind into the IPMI message handler. */ @@ -113,12 +116,52 @@ struct ipmi_smi_handlers void (*dec_usecount)(void *send_info); }; +struct ipmi_device_id { + unsigned char device_id; + unsigned char device_revision; + unsigned char firmware_revision_1; + unsigned char firmware_revision_2; + unsigned char ipmi_version; + unsigned char additional_device_support; + unsigned int manufacturer_id; + unsigned int product_id; + unsigned char aux_firmware_revision[4]; + unsigned int aux_firmware_revision_set : 1; +}; + +#define ipmi_version_major(v) ((v)->ipmi_version & 0xf) +#define ipmi_version_minor(v) ((v)->ipmi_version >> 4) + +/* Take a pointer to a raw data buffer and a length and extract device + id information from it. The first byte of data must point to the + byte from the get device id response after the completion code. + The caller is responsible for making sure the length is at least + 11 and the command completed without error. */ +static inline void ipmi_demangle_device_id(unsigned char *data, + unsigned int data_len, + struct ipmi_device_id *id) +{ + id->device_id = data[0]; + id->device_revision = data[1]; + id->firmware_revision_1 = data[2]; + id->firmware_revision_2 = data[3]; + id->ipmi_version = data[4]; + id->additional_device_support = data[5]; + id->manufacturer_id = data[6] | (data[7] << 8) | (data[8] << 16); + id->product_id = data[9] | (data[10] << 8); + if (data_len >= 15) { + memcpy(id->aux_firmware_revision, data+11, 4); + id->aux_firmware_revision_set = 1; + } else + id->aux_firmware_revision_set = 0; +} + /* Add a low-level interface to the IPMI driver. Note that if the interface doesn't know its slave address, it should pass in zero. */ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, void *send_info, - unsigned char version_major, - unsigned char version_minor, + struct ipmi_device_id *device_id, + struct device *dev, unsigned char slave_addr, ipmi_smi_t *intf); -- cgit v1.2.3 From 878a9f30d7b13015f3aa4534d7877d985f150183 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 26 Mar 2006 01:37:23 -0800 Subject: [PATCH] hpet header sanitization Add __KERNEL__ block. Use __KERNEL__ to allow ioctl interface to be usable. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hpet.h | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 27238194b21..707f7cb9e79 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -3,6 +3,8 @@ #include +#ifdef __KERNEL__ + /* * Offsets into HPET Registers */ @@ -85,22 +87,6 @@ struct hpet { #define Tn_FSB_INT_ADDR_SHIFT (32UL) #define Tn_FSB_INT_VAL_MASK (0x00000000ffffffffULL) -struct hpet_info { - unsigned long hi_ireqfreq; /* Hz */ - unsigned long hi_flags; /* information */ - unsigned short hi_hpet; - unsigned short hi_timer; -}; - -#define HPET_INFO_PERIODIC 0x0001 /* timer is periodic */ - -#define HPET_IE_ON _IO('h', 0x01) /* interrupt on */ -#define HPET_IE_OFF _IO('h', 0x02) /* interrupt off */ -#define HPET_INFO _IOR('h', 0x03, struct hpet_info) -#define HPET_EPI _IO('h', 0x04) /* enable periodic */ -#define HPET_DPI _IO('h', 0x05) /* disable periodic */ -#define HPET_IRQFREQ _IOW('h', 0x6, unsigned long) /* IRQFREQ usec */ - /* * exported interfaces */ @@ -133,4 +119,22 @@ int hpet_register(struct hpet_task *, int); int hpet_unregister(struct hpet_task *); int hpet_control(struct hpet_task *, unsigned int, unsigned long); +#endif /* __KERNEL__ */ + +struct hpet_info { + unsigned long hi_ireqfreq; /* Hz */ + unsigned long hi_flags; /* information */ + unsigned short hi_hpet; + unsigned short hi_timer; +}; + +#define HPET_INFO_PERIODIC 0x0001 /* timer is periodic */ + +#define HPET_IE_ON _IO('h', 0x01) /* interrupt on */ +#define HPET_IE_OFF _IO('h', 0x02) /* interrupt off */ +#define HPET_INFO _IOR('h', 0x03, struct hpet_info) +#define HPET_EPI _IO('h', 0x04) /* enable periodic */ +#define HPET_DPI _IO('h', 0x05) /* disable periodic */ +#define HPET_IRQFREQ _IOW('h', 0x6, unsigned long) /* IRQFREQ usec */ + #endif /* !__HPET__ */ -- cgit v1.2.3 From 5842add2f3b519111b6401f3a35862bd00a3aa7e Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Sun, 26 Mar 2006 01:37:26 -0800 Subject: [PATCH] VFS,fs/locks.c,NFSD4: add race_free posix_lock_file_conf() interface Lockd and the NFSv4 server both exercise a race condition where posix_test_lock() is called either before or after posix_lock_file() to deal with a denied lock request due to a conflicting lock. Remove the race condition for the NFSv4 server by adding a new conflicting lock parameter to __posix_lock_file() , changing the name to __posix_lock_file_conf(). Keep posix_lock_file() interface, add posix_lock_conf() interface, both call __posix_lock_file_conf(). [akpm@osdl.org: Put the EXPORT_SYMBOL() where it belongs] Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9674679525f..ab67181a5a5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -763,6 +763,7 @@ extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *); +extern int posix_lock_file_conf(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); -- cgit v1.2.3 From 88959ea968709c35e8b979ac9f5a398fa748091a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 26 Mar 2006 01:37:27 -0800 Subject: [PATCH] create struct compat_timex and use it everywhere We had a copy of the compatibility version of struct timex in each 64 bit architecture. This patch just creates a global one and replaces all the usages of the old ones. Signed-off-by: Stephen Rothwell Cc: Arnd Bergmann Acked-by: Kyle McMartin Acked-by: Tony Luck Acked-by: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index c9ab2a26348..859f95700d3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -45,6 +45,32 @@ struct compat_tms { compat_clock_t tms_cstime; }; +struct compat_timex { + compat_uint_t modes; + compat_long_t offset; + compat_long_t freq; + compat_long_t maxerror; + compat_long_t esterror; + compat_int_t status; + compat_long_t constant; + compat_long_t precision; + compat_long_t tolerance; + struct compat_timeval time; + compat_long_t tick; + compat_long_t ppsfreq; + compat_long_t jitter; + compat_int_t shift; + compat_long_t stabil; + compat_long_t jitcnt; + compat_long_t calcnt; + compat_long_t errcnt; + compat_long_t stbcnt; + + compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32; + compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32; + compat_int_t :32; compat_int_t :32; compat_int_t :32; compat_int_t :32; +}; + #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) typedef struct { -- cgit v1.2.3 From 3158e9411a66fb98d495ac441c242264f31aaf3e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 26 Mar 2006 01:37:29 -0800 Subject: [PATCH] consolidate sys32/compat_adjtimex Create compat_sys_adjtimex and use it an all appropriate places. Signed-off-by: Stephen Rothwell Cc: Arnd Bergmann Acked-by: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 2 ++ include/linux/timex.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 859f95700d3..24d659cdbaf 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -207,5 +207,7 @@ static inline int compat_timespec_compare(struct compat_timespec *lhs, return lhs->tv_nsec - rhs->tv_nsec; } +asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/timex.h b/include/linux/timex.h index 82dc9ae79d3..03914b7e41b 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -307,6 +307,8 @@ time_interpolator_reset(void) /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ extern u64 current_tick_length(void); +extern int do_adjtimex(struct timex *); + #endif /* KERNEL */ #endif /* LINUX_TIMEX_H */ -- cgit v1.2.3 From 22e6c1b39c648850438decd491f62d311800c7db Mon Sep 17 00:00:00 2001 From: Maneesh Soni Date: Sun, 26 Mar 2006 01:37:29 -0800 Subject: [PATCH] Use loff_t for size in struct proc_dir_entry Change proc_dir_entry->size to be loff_t to represent files like /proc/vmcore for 32bit systems with more than 4G memory. Needed for seeing correct size for /proc/vmcore for 32-bit systems with > 4G RAM. Signed-off-by: Maneesh Soni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 6b12b0f661b..cb224cf653b 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -56,7 +56,7 @@ struct proc_dir_entry { nlink_t nlink; uid_t uid; gid_t gid; - unsigned long size; + loff_t size; struct inode_operations * proc_iops; struct file_operations * proc_fops; get_info_t *get_info; -- cgit v1.2.3 From 6e0678f394c7bd21bfa5d252b071a09e10e7a749 Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Sun, 26 Mar 2006 01:37:44 -0800 Subject: [PATCH] mempool: add page allocator This will be used by the next patch in the series to replace duplicate mempool-backed page allocators in 2 places in the kernel. It is also likely that there will be more users in the future. Signed-off-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempool.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mempool.h b/include/linux/mempool.h index f2427d7394b..9787572e0ae 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -38,4 +38,16 @@ extern void mempool_free(void *element, mempool_t *pool); void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); +/* + * A mempool_alloc_t and mempool_free_t for a simple page allocator that + * allocates pages of the order specified by pool_data + */ +void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data); +void mempool_free_pages(void *element, void *pool_data); +static inline mempool_t *mempool_create_page_pool(int min_nr, int order) +{ + return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages, + (void *)(long)order); +} + #endif /* _LINUX_MEMPOOL_H */ -- cgit v1.2.3 From 53184082b070dfb077218828fdf839826102ed96 Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Sun, 26 Mar 2006 01:37:46 -0800 Subject: [PATCH] mempool: add kmalloc allocator Add another allocator to the common mempool code: a kmalloc/kfree allocator This will be used by the next patch in the series to replace duplicate mempool-backed kmalloc allocators in several places in the kernel. It is also very likely that there will be more users in the future. Signed-off-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempool.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 9787572e0ae..d23dbf076b5 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -38,6 +38,18 @@ extern void mempool_free(void *element, mempool_t *pool); void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); +/* + * A mempool_alloc_t and mempool_free_t to kmalloc the amount of memory + * specified by pool_data + */ +void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); +void mempool_kfree(void *element, void *pool_data); +static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) +{ + return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, + (void *) size); +} + /* * A mempool_alloc_t and mempool_free_t for a simple page allocator that * allocates pages of the order specified by pool_data -- cgit v1.2.3 From f183323d3822dee4d7b3147a59b6e8987fe201e0 Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Sun, 26 Mar 2006 01:37:48 -0800 Subject: [PATCH] mempool: add kzalloc allocator Add another allocator to the common mempool code: a kzalloc/kfree allocator This will be used by the next patch in the series to replace a mempool-backed kzalloc allocator. It is also very likely that there will be more users in the future. Signed-off-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempool.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempool.h b/include/linux/mempool.h index d23dbf076b5..41570ce353e 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -39,16 +39,22 @@ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); /* - * A mempool_alloc_t and mempool_free_t to kmalloc the amount of memory - * specified by pool_data + * 2 mempool_alloc_t's and a mempool_free_t to kmalloc/kzalloc and kfree + * the amount of memory specified by pool_data */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); +void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data); void mempool_kfree(void *element, void *pool_data); static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) { return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, (void *) size); } +static inline mempool_t *mempool_create_kzalloc_pool(int min_nr, size_t size) +{ + return mempool_create(min_nr, mempool_kzalloc, mempool_kfree, + (void *) size); +} /* * A mempool_alloc_t and mempool_free_t for a simple page allocator that -- cgit v1.2.3 From fec433aaaae32a02329ad7d71b0f3c91b7525077 Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Sun, 26 Mar 2006 01:37:49 -0800 Subject: [PATCH] mempool: add mempool_create_slab_pool() Create a simple wrapper function for the common case of creating a slab-based mempool. Signed-off-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempool.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 41570ce353e..9be484d1128 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -6,6 +6,8 @@ #include +struct kmem_cache; + typedef void * (mempool_alloc_t)(gfp_t gfp_mask, void *pool_data); typedef void (mempool_free_t)(void *element, void *pool_data); @@ -37,6 +39,12 @@ extern void mempool_free(void *element, mempool_t *pool); */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); +static inline mempool_t * +mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) +{ + return mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, + (void *) kc); +} /* * 2 mempool_alloc_t's and a mempool_free_t to kmalloc/kzalloc and kfree -- cgit v1.2.3 From 93d2341c750cda0df48a6cc67b35fe25f1ec47df Mon Sep 17 00:00:00 2001 From: Matthew Dobson Date: Sun, 26 Mar 2006 01:37:50 -0800 Subject: [PATCH] mempool: use mempool_create_slab_pool() Modify well over a dozen mempool users to call mempool_create_slab_pool() rather than calling mempool_create() with extra arguments, saving about 30 lines of code and increasing readability. Signed-off-by: Matthew Dobson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2o.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 5a9d8c59917..dd7d627bf66 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -950,9 +950,7 @@ static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name, if (!pool->slab) goto free_name; - pool->mempool = - mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, - pool->slab); + pool->mempool = mempool_create_slab_pool(min_nr, pool->slab); if (!pool->mempool) goto free_slab; -- cgit v1.2.3 From abcb6c9fd13fc2ad7757b818924dc8109a0e3775 Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Sun, 26 Mar 2006 01:37:51 -0800 Subject: [PATCH] 2TB files: st_blocks is invalid when calling stat64 This patch series fixes the following problems on 32 bits architecture. o stat64 returns the lower 32 bits of blocks, although userland st_blocks has 64 bits, because i_blocks has only 32 bits. The ioctl with FIOQSIZE has the same problem. o As Dave Kleikamp said, making >2TB file on JFS results in writing an invalid block number to disk inode. The cause is the same as above too. o In generic quota code dquot_transfer(), the file usage is calculated from i_blocks via inode_get_bytes(). If the file is over 2TB, the change of usage is less than expected. The cause is the same as above too. o As Trond Myklebust said, statfs64's entries related to blocks are invalid on statfs64 for a network filesystem which has more than 2^32-1 blocks with CONFIG_LBD disabled. [PATCH 3/3] We made patches to fix problems that occur when handling a large filesystem and a large file. It was discussed on the mails titled "stat64 for over 2TB file returned invalid st_blocks". Signed-off-by: Takashi Sato Cc: Dave Kleikamp Cc: Jan Kara Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/stat.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ab67181a5a5..64b0ca4f14e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -490,7 +490,7 @@ struct inode { unsigned int i_blkbits; unsigned long i_blksize; unsigned long i_version; - unsigned long i_blocks; + sector_t i_blocks; unsigned short i_bytes; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ struct mutex i_mutex; diff --git a/include/linux/stat.h b/include/linux/stat.h index 8ff2a122dfe..8669291352d 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -69,7 +69,7 @@ struct kstat { struct timespec mtime; struct timespec ctime; unsigned long blksize; - unsigned long blocks; + unsigned long long blocks; }; #endif -- cgit v1.2.3 From a0f62ac6362c168754cccb36f196b3dfbddc3bc3 Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Sun, 26 Mar 2006 01:37:52 -0800 Subject: [PATCH] 2TB files: add blkcnt_t Add blkcnt_t as the type of inode.i_blocks. This enables you to make the size of blkcnt_t either 4 bytes or 8 bytes on 32 bits architecture with CONFIG_LSF. - CONFIG_LSF Add new configuration parameter. - blkcnt_t On h8300, i386, mips, powerpc, s390 and sh that define sector_t, blkcnt_t is defined as u64 if CONFIG_LSF is enabled; otherwise it is defined as unsigned long. On other architectures, it is defined as unsigned long. - inode.i_blocks Change the type from sector_t to blkcnt_t. Signed-off-by: Takashi Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/types.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 64b0ca4f14e..155d29d5e5e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -490,7 +490,7 @@ struct inode { unsigned int i_blkbits; unsigned long i_blksize; unsigned long i_version; - sector_t i_blocks; + blkcnt_t i_blocks; unsigned short i_bytes; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ struct mutex i_mutex; diff --git a/include/linux/types.h b/include/linux/types.h index 54ae2d59e71..1046c7ad86d 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -137,6 +137,10 @@ typedef __s64 int64_t; typedef unsigned long sector_t; #endif +#ifndef HAVE_BLKCNT_T +typedef unsigned long blkcnt_t; +#endif + /* * The type of an index into the pagecache. Use a #define so asm/types.h * can override it. -- cgit v1.2.3 From e2d53f9525790dfacbcf09f359536311d3913d98 Mon Sep 17 00:00:00 2001 From: Takashi Sato Date: Sun, 26 Mar 2006 01:37:54 -0800 Subject: [PATCH] 2TB files: change type of kstatfs entries This fix was proposed by Trond Myklebust. He says: The type "sector_t" is heavily tied in to the block layer interface as an offset/handle to a block, and is subject to a supposedly block-specific configuration option: CONFIG_LBD. Despite this, it is used in struct kstatfs to save a couple of bytes on the stack whenever we call the filesystems' ->statfs(). So kstatfs's entries related to blocks are invalid on statfs64 for a network filesystem which has more than 2^32-1 blocks when CONFIG_LBD is disabled. - struct kstatfs Change the type of following entries from sector_t to u64. f_blocks f_bfree f_bavail f_files f_ffree Signed-off-by: Trond Myklebust Signed-off-by: Takashi Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/statfs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/statfs.h b/include/linux/statfs.h index ad83a2bdb82..b34cc829f98 100644 --- a/include/linux/statfs.h +++ b/include/linux/statfs.h @@ -8,11 +8,11 @@ struct kstatfs { long f_type; long f_bsize; - sector_t f_blocks; - sector_t f_bfree; - sector_t f_bavail; - sector_t f_files; - sector_t f_ffree; + u64 f_blocks; + u64 f_bfree; + u64 f_bavail; + u64 f_files; + u64 f_ffree; __kernel_fsid_t f_fsid; long f_namelen; long f_frsize; -- cgit v1.2.3 From 89747d369d34e333b9b60f10f333a0b727b4e4e2 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Sun, 26 Mar 2006 01:37:55 -0800 Subject: [PATCH] ext3_get_blocks: Mapping multiple blocks at a once Currently ext3_get_block() only maps or allocates one block at a time. This is quite inefficient for sequential IO workload. I have posted a early implements a simply multiple block map and allocation with current ext3. The basic idea is allocating the 1st block in the existing way, and attempting to allocate the next adjacent blocks on a best effort basis. More description about the implementation could be found here: http://marc.theaimsgroup.com/?l=ext2-devel&m=112162230003522&w=2 The following the latest version of the patch: break the original patch into 5 patches, re-worked some logicals, and fixed some bugs. The break ups are: [patch 1] Adding map multiple blocks at a time in ext3_get_blocks() [patch 2] Extend ext3_get_blocks() to support multiple block allocation [patch 3] Implement multiple block allocation in ext3-try-to-allocate (called via ext3_new_block()). [patch 4] Proper accounting updates in ext3_new_blocks() [patch 5] Adjust reservation window size properly (by the given number of blocks to allocate) before block allocation to increase the possibility of allocating multiple blocks in a single call. Tests done so far includes fsx,tiobench and dbench. The following numbers collected from Direct IO tests (1G file creation/read) shows the system time have been greatly reduced (more than 50% on my 8 cpu system) with the patches. 1G file DIO write: 2.6.15 2.6.15+patches real 0m31.275s 0m31.161s user 0m0.000s 0m0.000s sys 0m3.384s 0m0.564s 1G file DIO read: 2.6.15 2.6.15+patches real 0m30.733s 0m30.624s user 0m0.000s 0m0.004s sys 0m0.748s 0m0.380s Some previous test we did on buffered IO with using multiple blocks allocation and delayed allocation shows noticeable improvement on throughput and system time. This patch: Add support of mapping multiple blocks in one call. This is useful for DIO reads and re-writes (where blocks are already allocated), also is in line with Christoph's proposal of using getblocks() in mpage_readpage() or mpage_readpages(). Signed-off-by: Mingming Cao Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index e7239f2f97a..0adadd85fa6 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -775,9 +775,9 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned); int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -int ext3_get_block_handle(handle_t *handle, struct inode *inode, - sector_t iblock, struct buffer_head *bh_result, int create, - int extend_disksize); +int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, + sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, + int create, int extend_disksize); extern void ext3_read_inode (struct inode *); extern int ext3_write_inode (struct inode *, int); -- cgit v1.2.3 From b54e41ec17ae91dce174eb5a3515e7af4a440d42 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Sun, 26 Mar 2006 01:37:57 -0800 Subject: [PATCH] ext3_get_blocks: support multiple blocks allocation in ext3_new_block() Change ext3_try_to_allocate() (called via ext3_new_blocks()) to try to allocate the requested number of blocks on a best effort basis: After allocated the first block, it will always attempt to allocate the next few(up to the requested size and not beyond the reservation window) adjacent blocks at the same time. Signed-off-by: Mingming Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 0adadd85fa6..8bb4f842cde 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -36,7 +36,8 @@ struct statfs; * Define EXT3_RESERVATION to reserve data blocks for expanding files */ #define EXT3_DEFAULT_RESERVE_BLOCKS 8 -#define EXT3_MAX_RESERVE_BLOCKS 1024 +/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ +#define EXT3_MAX_RESERVE_BLOCKS 1027 #define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0 /* * Always enable hashed directories @@ -732,6 +733,8 @@ struct dir_private_info { extern int ext3_bg_has_super(struct super_block *sb, int group); extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); +extern int ext3_new_blocks (handle_t *, struct inode *, unsigned long, + unsigned long *, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, unsigned long); extern void ext3_free_blocks_sb (handle_t *, struct super_block *, -- cgit v1.2.3 From 205f87f6b342444f722e4559d33318686f7df2ca Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 26 Mar 2006 01:38:00 -0800 Subject: [PATCH] change buffer_head.b_size to size_t Increase the size of the buffer_head b_size field (only) for 64 bit platforms. Update some old and moldy comments in and around the structure as well. The b_size increase allows us to perform larger mappings and allocations for large I/O requests from userspace, which tie in with other changes allowing the get_block_t() interface to map multiple blocks at once. Signed-off-by: Nathan Scott Signed-off-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index da917ed096a..464f068f8b1 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -46,25 +46,28 @@ struct address_space; typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); /* - * Keep related fields in common cachelines. The most commonly accessed - * field (b_state) goes at the start so the compiler does not generate - * indexed addressing for it. + * Historically, a buffer_head was used to map a single block + * within a page, and of course as the unit of I/O through the + * filesystem and block layers. Nowadays the basic I/O unit + * is the bio, and buffer_heads are used for extracting block + * mappings (via a get_block_t call), for tracking state within + * a page (via a page_mapping) and for wrapping bio submission + * for backward compatibility reasons (e.g. submit_bh). */ struct buffer_head { - /* First cache line: */ unsigned long b_state; /* buffer state bitmap (see above) */ struct buffer_head *b_this_page;/* circular list of page's buffers */ struct page *b_page; /* the page this bh is mapped to */ - atomic_t b_count; /* users using this block */ - u32 b_size; /* block size */ - sector_t b_blocknr; /* block number */ - char *b_data; /* pointer to data block */ + sector_t b_blocknr; /* start block number */ + size_t b_size; /* size of mapping */ + char *b_data; /* pointer to data within the page */ struct block_device *b_bdev; bh_end_io_t *b_end_io; /* I/O completion */ void *b_private; /* reserved for b_end_io */ struct list_head b_assoc_buffers; /* associated with another mapping */ + atomic_t b_count; /* users using this buffer_head */ }; /* -- cgit v1.2.3 From b0cf2321c6599138f860517745503691556d8453 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 26 Mar 2006 01:38:00 -0800 Subject: [PATCH] pass b_size to ->get_block() Pass amount of disk needs to be mapped to get_block(). This way one can modify the fs ->get_block() functions to map multiple blocks at the same time. [akpm@osdl.org: performance tweak] [akpm@osdl.org: remove unneeded assignments] Signed-off-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 464f068f8b1..fb7e9b7ccbe 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -280,6 +280,7 @@ map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) set_buffer_mapped(bh); bh->b_bdev = sb->s_bdev; bh->b_blocknr = block; + bh->b_size = sb->s_blocksize; } /* -- cgit v1.2.3 From 1d8fa7a2b9a39d18727acc5c468e870df606c852 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 26 Mar 2006 01:38:02 -0800 Subject: [PATCH] remove ->get_blocks() support Now that get_block() can handle mapping multiple disk blocks, no need to have ->get_blocks(). This patch removes fs specific ->get_blocks() added for DIO and makes it users use get_block() instead. Signed-off-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 155d29d5e5e..9d967494695 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -252,9 +252,6 @@ extern void __init files_init(unsigned long); struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); -typedef int (get_blocks_t)(struct inode *inode, sector_t iblock, - unsigned long max_blocks, - struct buffer_head *bh_result, int create); typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); @@ -1645,7 +1642,7 @@ static inline void do_generic_file_read(struct file * filp, loff_t *ppos, ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, + unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, int lock_type); enum { @@ -1656,29 +1653,29 @@ enum { static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, + loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_blocks, end_io, DIO_LOCKING); + nr_segs, get_block, end_io, DIO_LOCKING); } static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, + loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_blocks, end_io, DIO_NO_LOCKING); + nr_segs, get_block, end_io, DIO_NO_LOCKING); } static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, + loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_blocks, end_io, DIO_OWN_LOCKING); + nr_segs, get_block, end_io, DIO_OWN_LOCKING); } extern struct file_operations generic_ro_fops; -- cgit v1.2.3 From 92127c7a45d4d167d9b015a5f9de6b41ed66f1d0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 26 Mar 2006 01:38:05 -0800 Subject: [PATCH] hrtimers: optimize softirq runqueues The hrtimer softirq is called from the timer softirq every tick. Retrieve the current time from xtime and wall_to_monotonic instead of calling base->get_time() for each timer base. Store the time in the base structure and provide a hook once clock source abstractions are in place and to keep the code open for new base clocks. Based on a patch from: Roman Zippel Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6401c31d6ad..64e2754ca73 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -72,14 +72,16 @@ struct hrtimer { /** * struct hrtimer_base - the timer base for a specific clock * - * @index: clock type index for per_cpu support when moving a timer - * to a base on another cpu. - * @lock: lock protecting the base and associated timers - * @active: red black tree root node for the active timers - * @first: pointer to the timer node which expires first - * @resolution: the resolution of the clock, in nanoseconds - * @get_time: function to retrieve the current time of the clock - * @curr_timer: the timer which is executing a callback right now + * @index: clock type index for per_cpu support when moving a timer + * to a base on another cpu. + * @lock: lock protecting the base and associated timers + * @active: red black tree root node for the active timers + * @first: pointer to the timer node which expires first + * @resolution: the resolution of the clock, in nanoseconds + * @get_time: function to retrieve the current time of the clock + * @get_sofirq_time: function to retrieve the current time from the softirq + * @curr_timer: the timer which is executing a callback right now + * @softirq_time: the time when running the hrtimer queue in the softirq */ struct hrtimer_base { clockid_t index; @@ -88,7 +90,9 @@ struct hrtimer_base { struct rb_node *first; ktime_t resolution; ktime_t (*get_time)(void); + ktime_t (*get_softirq_time)(void); struct hrtimer *curr_timer; + ktime_t softirq_time; }; /* -- cgit v1.2.3 From 44f21475511bbc0135b52c66ad74dcc6a9026da3 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sun, 26 Mar 2006 01:38:06 -0800 Subject: [PATCH] hrtimers: pass current time to hrtimer_forward() Pass current time to hrtimer_forward(). This allows to use the softirq time in the timer base when the forward function is called from the timer callback. Other places pass current time with a call to timer->base->get_time(). Signed-off-by: Roman Zippel Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 64e2754ca73..84fc186324e 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -130,7 +130,8 @@ static inline int hrtimer_active(const struct hrtimer *timer) } /* Forward a hrtimer so it expires after now: */ -extern unsigned long hrtimer_forward(struct hrtimer *timer, ktime_t interval); +extern unsigned long +hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); /* Precise sleep: */ extern long hrtimer_nanosleep(struct timespec *rqtp, -- cgit v1.2.3 From 432569bb9d9d424d7ffe5b21f8205c55bdd1aaa8 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sun, 26 Mar 2006 01:38:08 -0800 Subject: [PATCH] hrtimers: simplify nanosleep nanosleep is the only user of the expired state, so let it manage this itself, which makes the hrtimer code a bit simpler. The remaining time is also only calculated if requested. Signed-off-by: Roman Zippel Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 84fc186324e..0e8f4762f6f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -38,9 +38,7 @@ enum hrtimer_restart { * Timer states: */ enum hrtimer_state { - HRTIMER_INACTIVE, /* Timer is inactive */ - HRTIMER_EXPIRED, /* Timer is expired */ - HRTIMER_RUNNING, /* Timer is running the callback function */ + HRTIMER_INACTIVE, /* Timer is inactive */ HRTIMER_PENDING, /* Timer is pending */ }; -- cgit v1.2.3 From b75f7a51ca75c977d7d77f735d7a7859194eb39e Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sun, 26 Mar 2006 01:38:09 -0800 Subject: [PATCH] hrtimers: remove state field Remove the state field and encode this information in the rb_node similiar to normal timer. Signed-off-by: Roman Zippel Acked-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0e8f4762f6f..f57cc7bd700 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -34,13 +34,7 @@ enum hrtimer_restart { HRTIMER_RESTART, }; -/* - * Timer states: - */ -enum hrtimer_state { - HRTIMER_INACTIVE, /* Timer is inactive */ - HRTIMER_PENDING, /* Timer is pending */ -}; +#define HRTIMER_INACTIVE ((void *)1UL) struct hrtimer_base; @@ -61,7 +55,6 @@ struct hrtimer_base; struct hrtimer { struct rb_node node; ktime_t expires; - enum hrtimer_state state; int (*function)(void *); void *data; struct hrtimer_base *base; @@ -124,7 +117,7 @@ extern ktime_t hrtimer_get_next_event(void); static inline int hrtimer_active(const struct hrtimer *timer) { - return timer->state == HRTIMER_PENDING; + return timer->node.rb_parent != HRTIMER_INACTIVE; } /* Forward a hrtimer so it expires after now: */ -- cgit v1.2.3 From 272705c5979c114e63dbfcd28ea15093038a4c42 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sun, 26 Mar 2006 01:38:10 -0800 Subject: [PATCH] hrtimers: remove DEFINE_KTIME and ktime_to_clock_t() Now that it_real_value is gone, the last user of DEFINE_KTIME and ktime_to_clock_t are also gone, so remove it before someone starts using it again. Signed-off-by: Roman Zippel Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index f3dec45ef87..62bc5758070 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -64,9 +64,6 @@ typedef union { #if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR) -/* Define a ktime_t variable and initialize it to zero: */ -#define DEFINE_KTIME(kt) ktime_t kt = { .tv64 = 0 } - /** * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value * @@ -113,9 +110,6 @@ static inline ktime_t timeval_to_ktime(struct timeval tv) /* Map the ktime_t to timeval conversion to ns_to_timeval function */ #define ktime_to_timeval(kt) ns_to_timeval((kt).tv64) -/* Map the ktime_t to clock_t conversion to the inline in jiffies.h: */ -#define ktime_to_clock_t(kt) nsec_to_clock_t((kt).tv64) - /* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */ #define ktime_to_ns(kt) ((kt).tv64) @@ -136,9 +130,6 @@ static inline ktime_t timeval_to_ktime(struct timeval tv) * tv.sec < 0 and 0 >= tv.nsec < NSEC_PER_SEC */ -/* Define a ktime_t variable and initialize it to zero: */ -#define DEFINE_KTIME(kt) ktime_t kt = { .tv64 = 0 } - /* Set a ktime_t variable to a value in sec/nsec representation: */ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) { @@ -254,17 +245,6 @@ static inline struct timeval ktime_to_timeval(const ktime_t kt) .tv_usec = (suseconds_t) (kt.tv.nsec / NSEC_PER_USEC) }; } -/** - * ktime_to_clock_t - convert a ktime_t variable to clock_t format - * @kt: the ktime_t variable to convert - * - * Returns a clock_t variable with the converted value - */ -static inline clock_t ktime_to_clock_t(const ktime_t kt) -{ - return nsec_to_clock_t( (u64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec); -} - /** * ktime_to_ns - convert a ktime_t variable to scalar nanoseconds * @kt: the ktime_t variable to convert -- cgit v1.2.3 From df869b630d9d9131c10cf073fb61646048874b2f Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sun, 26 Mar 2006 01:38:11 -0800 Subject: [PATCH] hrtimers: remove nsec_t typedef nsec_t predates ktime_t and has mostly been superseded by it. In the few places that are left it's better to make it explicit that we're dealing with 64 bit values here. Signed-off-by: Roman Zippel Acked-by: Thomas Gleixner Acked-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index bf0e785e2e0..0cd696cee99 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -73,12 +73,6 @@ extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); #define timespec_valid(ts) \ (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC)) -/* - * 64-bit nanosec type. Large enough to span 292+ years in nanosecond - * resolution. Ought to be enough for a while. - */ -typedef s64 nsec_t; - extern struct timespec xtime; extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; @@ -114,9 +108,9 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); * Returns the scalar nanosecond representation of the timespec * parameter. */ -static inline nsec_t timespec_to_ns(const struct timespec *ts) +static inline s64 timespec_to_ns(const struct timespec *ts) { - return ((nsec_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } /** @@ -126,9 +120,9 @@ static inline nsec_t timespec_to_ns(const struct timespec *ts) * Returns the scalar nanosecond representation of the timeval * parameter. */ -static inline nsec_t timeval_to_ns(const struct timeval *tv) +static inline s64 timeval_to_ns(const struct timeval *tv) { - return ((nsec_t) tv->tv_sec * NSEC_PER_SEC) + + return ((s64) tv->tv_sec * NSEC_PER_SEC) + tv->tv_usec * NSEC_PER_USEC; } @@ -138,7 +132,7 @@ static inline nsec_t timeval_to_ns(const struct timeval *tv) * * Returns the timespec representation of the nsec parameter. */ -extern struct timespec ns_to_timespec(const nsec_t nsec); +extern struct timespec ns_to_timespec(const s64 nsec); /** * ns_to_timeval - Convert nanoseconds to timeval @@ -146,7 +140,7 @@ extern struct timespec ns_to_timespec(const nsec_t nsec); * * Returns the timeval representation of the nsec parameter. */ -extern struct timeval ns_to_timeval(const nsec_t nsec); +extern struct timeval ns_to_timeval(const s64 nsec); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 05cfb614ddbf3181540ce09d44d96486f8ba8d6a Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sun, 26 Mar 2006 01:38:12 -0800 Subject: [PATCH] hrtimers: remove data field The nanosleep cleanup allows to remove the data field of hrtimer. The callback function can use container_of() to get it's own data. Since the hrtimer structure is anyway embedded in other structures, this adds no overhead. Signed-off-by: Roman Zippel Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 5 +---- include/linux/sched.h | 1 + include/linux/timer.h | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f57cc7bd700..93830158348 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -45,9 +45,7 @@ struct hrtimer_base; * @expires: the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on * which the timer is based. - * @state: state of the timer * @function: timer expiry callback function - * @data: argument for the callback function * @base: pointer to the timer base (per cpu and per clock) * * The hrtimer structure must be initialized by init_hrtimer_#CLOCKTYPE() @@ -55,8 +53,7 @@ struct hrtimer_base; struct hrtimer { struct rb_node node; ktime_t expires; - int (*function)(void *); - void *data; + int (*function)(struct hrtimer *); struct hrtimer_base *base; }; diff --git a/include/linux/sched.h b/include/linux/sched.h index e0054c1b9a0..036d14d2bf9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -402,6 +402,7 @@ struct signal_struct { /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; + struct task_struct *tsk; ktime_t it_real_incr; /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ diff --git a/include/linux/timer.h b/include/linux/timer.h index ee5a09e806e..b5caabca553 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -96,6 +96,7 @@ static inline void add_timer(struct timer_list *timer) extern void init_timers(void); extern void run_local_timers(void); -extern int it_real_fn(void *); +struct hrtimer; +extern int it_real_fn(struct hrtimer *); #endif -- cgit v1.2.3 From ee8a4b7f857fe7ba243e65c8925798cf8eda5ab0 Mon Sep 17 00:00:00 2001 From: Hansjoerg Lipp Date: Sun, 26 Mar 2006 01:38:32 -0800 Subject: [PATCH] isdn4linux: Siemens Gigaset drivers - tty interface And: Tilman Schmidt This patch adds the tty interface to the gigaset module. The tty interface provides direct access to the AT command set of the Gigaset devices. Signed-off-by: Hansjoerg Lipp Signed-off-by: Tilman Schmidt Cc: Karsten Keil Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gigaset_dev.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/gigaset_dev.h (limited to 'include/linux') diff --git a/include/linux/gigaset_dev.h b/include/linux/gigaset_dev.h new file mode 100644 index 00000000000..70ad09c8ad1 --- /dev/null +++ b/include/linux/gigaset_dev.h @@ -0,0 +1,32 @@ +/* + * interface to user space for the gigaset driver + * + * Copyright (c) 2004 by Hansjoerg Lipp + * + * ===================================================================== + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * ===================================================================== + * Version: $Id: gigaset_dev.h,v 1.4.4.4 2005/11/21 22:28:09 hjlipp Exp $ + * ===================================================================== + */ + +#ifndef GIGASET_INTERFACE_H +#define GIGASET_INTERFACE_H + +#include + +#define GIGASET_IOCTL 0x47 + +#define GIGVER_DRIVER 0 +#define GIGVER_COMPAT 1 +#define GIGVER_FWBASE 2 + +#define GIGASET_REDIR _IOWR (GIGASET_IOCTL, 0, int) +#define GIGASET_CONFIG _IOWR (GIGASET_IOCTL, 1, int) +#define GIGASET_BRKCHARS _IOW (GIGASET_IOCTL, 2, unsigned char[6]) //FIXME [6] okay? +#define GIGASET_VERSION _IOWR (GIGASET_IOCTL, 3, unsigned[4]) + +#endif -- cgit v1.2.3 From 0b28002fdf2d5b6ce3135a544c04940a16c5b0ba Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 26 Mar 2006 01:38:58 -0800 Subject: [PATCH] more s/fucn/func/ typo fixes s/fucntion/function/ typo fixes Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gameport.h | 4 ++-- include/linux/serio.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 2401dea2b86..9c8e6da2393 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -119,7 +119,7 @@ static inline void gameport_set_name(struct gameport *gameport, const char *name } /* - * Use the following fucntions to manipulate gameport's per-port + * Use the following functions to manipulate gameport's per-port * driver-specific data. */ static inline void *gameport_get_drvdata(struct gameport *gameport) @@ -133,7 +133,7 @@ static inline void gameport_set_drvdata(struct gameport *gameport, void *data) } /* - * Use the following fucntions to pin gameport's driver in process context + * Use the following functions to pin gameport's driver in process context */ static inline int gameport_pin_driver(struct gameport *gameport) { diff --git a/include/linux/serio.h b/include/linux/serio.h index aa4d6493a03..690aabca8ed 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -119,7 +119,7 @@ static inline void serio_cleanup(struct serio *serio) } /* - * Use the following fucntions to manipulate serio's per-port + * Use the following functions to manipulate serio's per-port * driver-specific data. */ static inline void *serio_get_drvdata(struct serio *serio) @@ -133,7 +133,7 @@ static inline void serio_set_drvdata(struct serio *serio, void *data) } /* - * Use the following fucntions to protect critical sections in + * Use the following functions to protect critical sections in * driver code from port's interrupt handler */ static inline void serio_pause_rx(struct serio *serio) @@ -147,7 +147,7 @@ static inline void serio_continue_rx(struct serio *serio) } /* - * Use the following fucntions to pin serio's driver in process context + * Use the following functions to pin serio's driver in process context */ static inline int serio_pin_driver(struct serio *serio) { -- cgit v1.2.3 From e9bebd6f3acee68fa07d44726895b40733cb1dc0 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 26 Mar 2006 01:39:55 -0800 Subject: [PATCH] bitops: remove unused generic bitops in include/linux/bitops.h generic_{ffs,fls,fls64,hweight{64,32,16,8}}() were moved into include/asm-generic/bitops.h. So all architectures don't use them. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 124 +------------------------------------------------ 1 file changed, 1 insertion(+), 123 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index f17525a963d..5d1eabcde5d 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -2,89 +2,12 @@ #define _LINUX_BITOPS_H #include -/* - * ffs: find first bit set. This is defined the same way as - * the libc and compiler builtin ffs routines, therefore - * differs in spirit from the above ffz (man ffs). - */ - -static inline int generic_ffs(int x) -{ - int r = 1; - - if (!x) - return 0; - if (!(x & 0xffff)) { - x >>= 16; - r += 16; - } - if (!(x & 0xff)) { - x >>= 8; - r += 8; - } - if (!(x & 0xf)) { - x >>= 4; - r += 4; - } - if (!(x & 3)) { - x >>= 2; - r += 2; - } - if (!(x & 1)) { - x >>= 1; - r += 1; - } - return r; -} - -/* - * fls: find last bit set. - */ - -static __inline__ int generic_fls(int x) -{ - int r = 32; - - if (!x) - return 0; - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - return r; -} - /* * Include this here because some architectures need generic_ffs/fls in * scope */ #include - -static inline int generic_fls64(__u64 x) -{ - __u32 h = x >> 32; - if (h) - return fls(h) + 32; - return fls(x); -} - static __inline__ int get_bitmask_order(unsigned int count) { int order; @@ -103,54 +26,9 @@ static __inline__ int get_count_order(unsigned int count) return order; } -/* - * hweightN: returns the hamming weight (i.e. the number - * of bits set) of a N-bit word - */ - -static inline unsigned int generic_hweight32(unsigned int w) -{ - unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555); - res = (res & 0x33333333) + ((res >> 2) & 0x33333333); - res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); - res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); - return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); -} - -static inline unsigned int generic_hweight16(unsigned int w) -{ - unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555); - res = (res & 0x3333) + ((res >> 2) & 0x3333); - res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F); - return (res & 0x00FF) + ((res >> 8) & 0x00FF); -} - -static inline unsigned int generic_hweight8(unsigned int w) -{ - unsigned int res = (w & 0x55) + ((w >> 1) & 0x55); - res = (res & 0x33) + ((res >> 2) & 0x33); - return (res & 0x0F) + ((res >> 4) & 0x0F); -} - -static inline unsigned long generic_hweight64(__u64 w) -{ -#if BITS_PER_LONG < 64 - return generic_hweight32((unsigned int)(w >> 32)) + - generic_hweight32((unsigned int)w); -#else - u64 res; - res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul); - res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); - res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful); - res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul); - res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul); - return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul); -#endif -} - static inline unsigned long hweight_long(unsigned long w) { - return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w); + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } /* -- cgit v1.2.3 From fbb18a277a6f192404aa20ece49529acb1e1e76d Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 26 Mar 2006 23:13:39 +0100 Subject: [SERIAL] amba-pl010: allow platforms to specify modem control method The amba-pl010 hardware does not provide RTS and DTR control lines; it is expected that these will be implemented using GPIO. Allow platforms to supply a function to implement manipulation of modem control lines. Signed-off-by: Russell King --- include/linux/amba/serial.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index dc726ffcceb..48ee32a18ac 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -158,4 +158,10 @@ #define UART01x_RSR_ANY (UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE) #define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) +#ifndef __ASSEMBLY__ +struct amba_pl010_data { + void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl); +}; +#endif + #endif -- cgit v1.2.3 From 5931c4350059ce9bd5fe398b628c478753a11e44 Mon Sep 17 00:00:00 2001 From: Sylvain Munaut Date: Sun, 26 Mar 2006 13:37:07 +0200 Subject: [PATCH] ppc32: Adds support for the PCI hostbridge in MPC5200B ppc32: Adds support for the PCI hostbridge in MPC5200B Signed-off-by: John Rigby Signed-off-by: Sylvain Munaut Signed-off-by: Paul Mackerras --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6f080ae5928..72d1b678e0e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -772,6 +772,7 @@ #define PCI_DEVICE_ID_MOTOROLA_HAWK 0x4803 #define PCI_DEVICE_ID_MOTOROLA_HARRIER 0x480b #define PCI_DEVICE_ID_MOTOROLA_MPC5200 0x5803 +#define PCI_DEVICE_ID_MOTOROLA_MPC5200B 0x5809 #define PCI_VENDOR_ID_PROMISE 0x105a #define PCI_DEVICE_ID_PROMISE_20265 0x0d30 -- cgit v1.2.3 From 837c7878771c15ed8d85ecf814ece7fcb4551b46 Mon Sep 17 00:00:00 2001 From: Ben Woodard Date: Wed, 22 Mar 2006 08:09:31 +0100 Subject: [BLOCK] increase size of disk stat counters The kernel's representation of the disk statistics uses the type unsigned which is 32b on both 32b and 64b platforms. Unfortunately, most system tools that work with these numbers that are exported in /proc/diskstats including iostat read these numbers into unsigned longs. This works fine on 32b platforms and when the number of IO transactions are small on 64b platforms. However, when the numbers wrap on 64b platforms & you read the numbers into unsigned longs, and compare the numbers to previous readings, then you get an unsigned representation of a negative number. This looks like a very large 64b number & gives you bizarre readouts in iostat: ilc4: Device: rrqm/s wrqm/s r/s w/s rsec/s wsec/s rkB/s wkB/s avgrq-sz avgqu-sz await svctm %util ilc4: sda 5.50 0.00 143.96 0.00 307496983987862656.00 0.00 153748491993931328.00 0.00 2136028725038430.00 7.94 55.12 5.59 80.42 Though fixing iostat in user space is possible, and a quick survey indicates that several other similar tools also use unsigned longs when processing /proc/diskstats. Therefore, it seems like a better approach would be to extend the length of the disk_stats structure on 64b architectures to 64b. The following patch does that. It should not affect the operation on 32b platforms. Signed-off-by: Ben Woodard Cc: Rick Lindsley Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/genhd.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fd647fde5ec..179fea53fc8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -89,12 +89,12 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 struct disk_stats { - unsigned sectors[2]; /* READs and WRITEs */ - unsigned ios[2]; - unsigned merges[2]; - unsigned ticks[2]; - unsigned io_ticks; - unsigned time_in_queue; + unsigned long sectors[2]; /* READs and WRITEs */ + unsigned long ios[2]; + unsigned long merges[2]; + unsigned long ticks[2]; + unsigned long io_ticks; + unsigned long time_in_queue; }; struct gendisk { -- cgit v1.2.3 From f75ba3ade8a4599d67040a9493d75a864e7b329c Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:52 -0800 Subject: [PATCH] autofs4: increase module version Update autofs4 version. Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_fs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index 9343c89d843..d998ddcf728 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -23,7 +23,7 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 4 -#define AUTOFS_PROTO_SUBVERSION 7 +#define AUTOFS_PROTO_SUBVERSION 10 /* Mask for expire behaviour */ #define AUTOFS_EXP_IMMEDIATE 1 -- cgit v1.2.3 From 5c0a32fc2cd0be912511199449a37a4a6f0f582d Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 27 Mar 2006 01:14:55 -0800 Subject: [PATCH] autofs4: add new packet type for v5 communications This patch define a new autofs packet for autofs v5 and updates the waitq.c functions to handle the additional packet type. Signed-off-by: Ian Kent Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/auto_fs4.h | 51 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h index d998ddcf728..0a6bc52ffe8 100644 --- a/include/linux/auto_fs4.h +++ b/include/linux/auto_fs4.h @@ -19,18 +19,37 @@ #undef AUTOFS_MIN_PROTO_VERSION #undef AUTOFS_MAX_PROTO_VERSION -#define AUTOFS_PROTO_VERSION 4 +#define AUTOFS_PROTO_VERSION 5 #define AUTOFS_MIN_PROTO_VERSION 3 -#define AUTOFS_MAX_PROTO_VERSION 4 +#define AUTOFS_MAX_PROTO_VERSION 5 -#define AUTOFS_PROTO_SUBVERSION 10 +#define AUTOFS_PROTO_SUBVERSION 0 /* Mask for expire behaviour */ #define AUTOFS_EXP_IMMEDIATE 1 #define AUTOFS_EXP_LEAVES 2 -/* New message type */ -#define autofs_ptype_expire_multi 2 /* Expire entry (umount request) */ +/* Daemon notification packet types */ +enum autofs_notify { + NFY_NONE, + NFY_MOUNT, + NFY_EXPIRE +}; + +/* Kernel protocol version 4 packet types */ + +/* Expire entry (umount request) */ +#define autofs_ptype_expire_multi 2 + +/* Kernel protocol version 5 packet types */ + +/* Indirect mount missing and expire requests. */ +#define autofs_ptype_missing_indirect 3 +#define autofs_ptype_expire_indirect 4 + +/* Direct mount missing and expire requests */ +#define autofs_ptype_missing_direct 5 +#define autofs_ptype_expire_direct 6 /* v4 multi expire (via pipe) */ struct autofs_packet_expire_multi { @@ -40,14 +59,36 @@ struct autofs_packet_expire_multi { char name[NAME_MAX+1]; }; +/* autofs v5 common packet struct */ +struct autofs_v5_packet { + struct autofs_packet_hdr hdr; + autofs_wqt_t wait_queue_token; + __u32 dev; + __u64 ino; + __u32 uid; + __u32 gid; + __u32 pid; + __u32 tgid; + __u32 len; + char name[NAME_MAX+1]; +}; + +typedef struct autofs_v5_packet autofs_packet_missing_indirect_t; +typedef struct autofs_v5_packet autofs_packet_expire_indirect_t; +typedef struct autofs_v5_packet autofs_packet_missing_direct_t; +typedef struct autofs_v5_packet autofs_packet_expire_direct_t; + union autofs_packet_union { struct autofs_packet_hdr hdr; struct autofs_packet_missing missing; struct autofs_packet_expire expire; struct autofs_packet_expire_multi expire_multi; + struct autofs_v5_packet v5_packet; }; #define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93,0x66,int) +#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI +#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI #define AUTOFS_IOC_PROTOSUBVER _IOR(0x93,0x67,int) #define AUTOFS_IOC_ASKREGHOST _IOR(0x93,0x68,int) #define AUTOFS_IOC_TOGGLEREGHOST _IOR(0x93,0x69,int) -- cgit v1.2.3 From efc36aa5608f5717338747e152c23f2cfdb14697 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:14:59 -0800 Subject: [PATCH] knfsd: Change the store of auth_domains to not be a 'cache' The 'auth_domain's are simply handles on internal data structures. They do not cache information from user-space, and forcing them into the mold of a 'cache' misrepresents their true nature and causes confusion. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/svcauth.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index c119ce7cbd2..2fe2087edd6 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -45,9 +45,10 @@ struct svc_rqst; /* forward decl */ * of ip addresses to the given client. */ struct auth_domain { - struct cache_head h; + struct kref ref; + struct hlist_node hash; char *name; - int flavour; + struct auth_ops *flavour; }; /* @@ -86,6 +87,9 @@ struct auth_domain { * * domain_release() * This call releases a domain. + * set_client() + * Givens a pending request (struct svc_rqst), finds and assigns + * an appropriate 'auth_domain' as the client. */ struct auth_ops { char * name; @@ -117,7 +121,7 @@ extern void svc_auth_unregister(rpc_authflavor_t flavor); extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom); -extern struct auth_domain *auth_domain_lookup(struct auth_domain *item, int set); +extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); extern struct auth_domain *auth_unix_lookup(struct in_addr addr); extern int auth_unix_forget_old(struct auth_domain *dom); @@ -160,8 +164,6 @@ static inline unsigned long hash_mem(char *buf, int length, int bits) return hash >> (BITS_PER_LONG - bits); } -extern struct cache_detail auth_domain_cache, ip_map_cache; - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ -- cgit v1.2.3 From eab7e2e647c348b418e8715ecaca0177e1b473c7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:00 -0800 Subject: [PATCH] knfsd: Break the hard linkage from svc_expkey to svc_export Current svc_expkey holds a pointer to the svc_export structure, so updates to that structure have to be in-place, which is a wart on the whole cache infrastruct. So we break that linkage and just do a second lookup. If this became a performance issue, it would be possible to put a direct link back in which was only used conditionally. i.e. when an object is replaced in the cache, we set a flag in the old object. When dereferencing the link from svc_expkey, if the flag is set, we drop the reference and do a fresh lookup. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 6bad4766d3d..d52e0b7ad37 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -67,7 +67,8 @@ struct svc_expkey { int ek_fsidtype; u32 ek_fsid[3]; - struct svc_export * ek_export; + struct vfsmount * ek_mnt; + struct dentry * ek_dentry; }; #define EX_SECURE(exp) (!((exp)->ex_flags & NFSEXP_INSECURE_PORT)) @@ -114,22 +115,9 @@ static inline void exp_get(struct svc_export *exp) { cache_get(&exp->h); } -static inline struct svc_export * +extern struct svc_export * exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, - struct cache_req *reqp) -{ - struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); - if (ek && !IS_ERR(ek)) { - struct svc_export *exp = ek->ek_export; - int err; - exp_get(exp); - expkey_put(&ek->h, &svc_expkey_cache); - if ((err = cache_check(&svc_export_cache, &exp->h, reqp))) - exp = ERR_PTR(err); - return exp; - } else - return ERR_PTR(PTR_ERR(ek)); -} + struct cache_req *reqp); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 7d317f2c9f1e9dcf4f632fa98f91d1d4a36c4cae Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:01 -0800 Subject: [PATCH] knfsd: Get rid of 'inplace' sunrpc caches These were an unnecessary wart. Also only have one 'DefineSimpleCache..' instead of two. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/cache.h | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index c4e3ea7cf15..405ac14e509 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -133,14 +133,11 @@ struct cache_deferred_req { * If "set" == 0 : * If an entry is found, it is returned * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 and INPLACE == 0 : + * If "set" == 1 : * If no entry is found a new one is inserted with data from "template" * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE * If a CACHE_VALID entry is found, a new entry is swapped in with data * from "template" - * If set == 1, and INPLACE == 1 : - * As above, except that if a CACHE_VALID entry is found, we UPDATE in place - * instead of swapping in a new entry. * * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not * run but insteead CACHE_NEGATIVE is set in any new item. @@ -159,13 +156,8 @@ struct cache_deferred_req { * TEST tests if "tmp" matches "item" * INIT copies key information from "item" to "new" * UPDATE copies content information from "item" to "tmp" - * INPLACE is true if updates can happen inplace rather than allocating a new structure - * - * WARNING: any substantial changes to this must be reflected in - * net/sunrpc/svcauth.c(auth_domain_lookup) - * which is a similar routine that is open-coded. */ -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE,INPLACE) \ +#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE) \ RTN *FNAME ARGS \ { \ RTN *tmp, *new=NULL; \ @@ -179,13 +171,13 @@ RTN *FNAME ARGS \ tmp = container_of(*hp, RTN, MEMBER); \ if (TEST) { /* found a match */ \ \ - if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ + if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ break; \ \ if (new) \ {INIT;} \ if (set) { \ - if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ + if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ { /* need to swap in new */ \ RTN *t2; \ \ @@ -206,7 +198,7 @@ RTN *FNAME ARGS \ else read_unlock(&(DETAIL)->hash_lock); \ if (set) \ cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \ - if (set && !INPLACE && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ + if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \ return tmp; \ } \ @@ -239,10 +231,12 @@ RTN *FNAME ARGS \ return NULL; \ } -#define DefineSimpleCacheLookup(STRUCT,INPLACE) \ - DefineCacheLookup(struct STRUCT, h, STRUCT##_lookup, (struct STRUCT *item, int set), /*no setup */, \ - & STRUCT##_cache, STRUCT##_hash(item), STRUCT##_match(item, tmp),\ - STRUCT##_init(new, item), STRUCT##_update(tmp, item),INPLACE) +#define DefineSimpleCacheLookup(STRUCT, FUNC) \ + DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ + (struct STRUCT *item, int set), /*no setup */, \ + & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ + STRUCT##_init(new, item), STRUCT##_update(tmp, item)) + #define cache_for_each(pos, detail, index, member) \ for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ -- cgit v1.2.3 From 15a5f6bd23eddd5b3be80366f364be04fb1c1c99 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:02 -0800 Subject: [PATCH] knfsd: Create cache_lookup function instead of using a macro to declare one The C++-like 'template' approach proves to be too ugly and hard to work with. The old 'template' won't go away until all users are updated. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 405ac14e509..3e17a5ff1de 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -81,6 +81,11 @@ struct cache_detail { struct cache_detail *cd, struct cache_head *h); + struct cache_head * (*alloc)(void); + int (*match)(struct cache_head *orig, struct cache_head *new); + void (*init)(struct cache_head *orig, struct cache_head *new); + void (*update)(struct cache_head *orig, struct cache_head *new); + /* fields below this comment are for internal use * and should not be touched by cache owners */ @@ -237,6 +242,13 @@ RTN *FNAME ARGS \ & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ STRUCT##_init(new, item), STRUCT##_update(tmp, item)) +extern struct cache_head * +sunrpc_cache_lookup(struct cache_detail *detail, + struct cache_head *key, int hash); +extern struct cache_head * +sunrpc_cache_update(struct cache_detail *detail, + struct cache_head *new, struct cache_head *old, int hash); + #define cache_for_each(pos, detail, index, member) \ for (({read_lock(&(detail)->hash_lock); index = (detail)->hash_size;}) ; \ -- cgit v1.2.3 From 4d90452cb23b08a9a9dd001010f0ee6b1ee83a45 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:07 -0800 Subject: [PATCH] knfsd: Remove DefineCacheLookup This has been replaced by more traditional code. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/cache.h | 113 ------------------------------------------- 1 file changed, 113 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 3e17a5ff1de..afc481dd02d 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -128,119 +128,6 @@ struct cache_deferred_req { int too_many); }; -/* - * just like a template in C++, this macro does cache lookup - * for us. - * The function is passed some sort of HANDLE from which a cache_detail - * structure can be determined (via SETUP, DETAIL), a template - * cache entry (type RTN*), and a "set" flag. Using the HASHFN and the - * TEST, the function will try to find a matching cache entry in the cache. - * If "set" == 0 : - * If an entry is found, it is returned - * If no entry is found, a new non-VALID entry is created. - * If "set" == 1 : - * If no entry is found a new one is inserted with data from "template" - * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE - * If a CACHE_VALID entry is found, a new entry is swapped in with data - * from "template" - * - * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not - * run but insteead CACHE_NEGATIVE is set in any new item. - - * In any case, the new entry is returned with a reference count. - * - * - * RTN is a struct type for a cache entry - * MEMBER is the member of the cache which is cache_head, which must be first - * FNAME is the name for the function - * ARGS are arguments to function and must contain RTN *item, int set. May - * also contain something to be usedby SETUP or DETAIL to find cache_detail. - * SETUP locates the cache detail and makes it available as... - * DETAIL identifies the cache detail, possibly set up by SETUP - * HASHFN returns a hash value of the cache entry "item" - * TEST tests if "tmp" matches "item" - * INIT copies key information from "item" to "new" - * UPDATE copies content information from "item" to "tmp" - */ -#define DefineCacheLookup(RTN,MEMBER,FNAME,ARGS,SETUP,DETAIL,HASHFN,TEST,INIT,UPDATE) \ -RTN *FNAME ARGS \ -{ \ - RTN *tmp, *new=NULL; \ - struct cache_head **hp, **head; \ - SETUP; \ - head = &(DETAIL)->hash_table[HASHFN]; \ - retry: \ - if (set||new) write_lock(&(DETAIL)->hash_lock); \ - else read_lock(&(DETAIL)->hash_lock); \ - for(hp=head; *hp != NULL; hp = &tmp->MEMBER.next) { \ - tmp = container_of(*hp, RTN, MEMBER); \ - if (TEST) { /* found a match */ \ - \ - if (set && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ - break; \ - \ - if (new) \ - {INIT;} \ - if (set) { \ - if (test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ - { /* need to swap in new */ \ - RTN *t2; \ - \ - new->MEMBER.next = tmp->MEMBER.next; \ - *hp = &new->MEMBER; \ - tmp->MEMBER.next = NULL; \ - t2 = tmp; tmp = new; new = t2; \ - } \ - if (test_bit(CACHE_NEGATIVE, &item->MEMBER.flags)) \ - set_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - else { \ - UPDATE; \ - clear_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - } \ - } \ - cache_get(&tmp->MEMBER); \ - if (set||new) write_unlock(&(DETAIL)->hash_lock); \ - else read_unlock(&(DETAIL)->hash_lock); \ - if (set) \ - cache_fresh(DETAIL, &tmp->MEMBER, item->MEMBER.expiry_time); \ - if (set && new) cache_fresh(DETAIL, &new->MEMBER, 0); \ - if (new) (DETAIL)->cache_put(&new->MEMBER, DETAIL); \ - return tmp; \ - } \ - } \ - /* Didn't find anything */ \ - if (new) { \ - INIT; \ - new->MEMBER.next = *head; \ - *head = &new->MEMBER; \ - (DETAIL)->entries ++; \ - cache_get(&new->MEMBER); \ - if (set) { \ - tmp = new; \ - if (test_bit(CACHE_NEGATIVE, &item->MEMBER.flags)) \ - set_bit(CACHE_NEGATIVE, &tmp->MEMBER.flags); \ - else {UPDATE;} \ - } \ - } \ - if (set||new) write_unlock(&(DETAIL)->hash_lock); \ - else read_unlock(&(DETAIL)->hash_lock); \ - if (new && set) \ - cache_fresh(DETAIL, &new->MEMBER, item->MEMBER.expiry_time); \ - if (new) \ - return new; \ - new = kmalloc(sizeof(*new), GFP_KERNEL); \ - if (new) { \ - cache_init(&new->MEMBER); \ - goto retry; \ - } \ - return NULL; \ -} - -#define DefineSimpleCacheLookup(STRUCT, FUNC) \ - DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ - (struct STRUCT *item, int set), /*no setup */, \ - & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ - STRUCT##_init(new, item), STRUCT##_update(tmp, item)) extern struct cache_head * sunrpc_cache_lookup(struct cache_detail *detail, -- cgit v1.2.3 From ebd0cb1af3be2729cc1f574681dfba01fcf458d9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:08 -0800 Subject: [PATCH] knfsd: Unexport cache_fresh and fix a small race Cache_fresh is now only used in cache.c, so unexport it. Part of cache_fresh (setting CACHE_VALID) should really be done under the lock, while part (calling cache_revisit_request etc) must be done outside the lock. So we split it up appropriately. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/cache.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index afc481dd02d..a37fead1873 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -165,8 +165,6 @@ static inline int cache_put(struct cache_head *h, struct cache_detail *cd) } extern void cache_init(struct cache_head *h); -extern void cache_fresh(struct cache_detail *detail, - struct cache_head *head, time_t expiry); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); -- cgit v1.2.3 From baab935ff3bdac20c558809da0d8e8f761840219 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:15:09 -0800 Subject: [PATCH] knfsd: Convert sunrpc_cache to use krefs .. it makes some of the code nicer. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 4 +--- include/linux/sunrpc/cache.h | 13 ++++++------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index d52e0b7ad37..a6c08a47b25 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -102,13 +102,11 @@ int exp_rootfh(struct auth_domain *, int exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq); int nfserrno(int errno); -extern void expkey_put(struct cache_head *item, struct cache_detail *cd); -extern void svc_export_put(struct cache_head *item, struct cache_detail *cd); extern struct cache_detail svc_export_cache, svc_expkey_cache; static inline void exp_put(struct svc_export *exp) { - svc_export_put(&exp->h, &svc_export_cache); + cache_put(&exp->h, &svc_export_cache); } static inline void exp_get(struct svc_export *exp) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index a37fead1873..ad3f5cbdb77 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -50,7 +50,7 @@ struct cache_head { time_t last_refresh; /* If CACHE_PENDING, this is when upcall * was sent, else this is when update was received */ - atomic_t refcnt; + struct kref ref; unsigned long flags; }; #define CACHE_VALID 0 /* Entry contains valid data */ @@ -68,8 +68,7 @@ struct cache_detail { atomic_t inuse; /* active user-space update or lookup */ char *name; - void (*cache_put)(struct cache_head *, - struct cache_detail*); + void (*cache_put)(struct kref *); void (*cache_request)(struct cache_detail *cd, struct cache_head *h, @@ -151,17 +150,17 @@ extern void cache_clean_deferred(void *owner); static inline struct cache_head *cache_get(struct cache_head *h) { - atomic_inc(&h->refcnt); + kref_get(&h->ref); return h; } -static inline int cache_put(struct cache_head *h, struct cache_detail *cd) +static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { - if (atomic_read(&h->refcnt) <= 2 && + if (atomic_read(&h->ref.refcount) <= 2 && h->expiry_time < cd->nextcheck) cd->nextcheck = h->expiry_time; - return atomic_dec_and_test(&h->refcnt); + kref_put(&h->ref, cd->cache_put); } extern void cache_init(struct cache_head *h); -- cgit v1.2.3 From 74cae61ab45f19a3e8c4d9f53c0e94df129c7915 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 27 Mar 2006 01:15:10 -0800 Subject: [PATCH] fs/nfsd/export.c,net/sunrpc/cache.c: make needlessly global code static We can now make some code static. Signed-off-by: Adrian Bunk Cc: Neil Brown Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 5 +---- include/linux/sunrpc/cache.h | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index a6c08a47b25..d2a8abb5011 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -86,9 +86,6 @@ void nfsd_export_shutdown(void); void nfsd_export_flush(void); void exp_readlock(void); void exp_readunlock(void); -struct svc_expkey * exp_find_key(struct auth_domain *clp, - int fsid_type, u32 *fsidv, - struct cache_req *reqp); struct svc_export * exp_get_by_name(struct auth_domain *clp, struct vfsmount *mnt, struct dentry *dentry, @@ -102,7 +99,7 @@ int exp_rootfh(struct auth_domain *, int exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq); int nfserrno(int errno); -extern struct cache_detail svc_export_cache, svc_expkey_cache; +extern struct cache_detail svc_export_cache; static inline void exp_put(struct svc_export *exp) { diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index ad3f5cbdb77..b5612c958cc 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -163,7 +163,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) kref_put(&h->ref, cd->cache_put); } -extern void cache_init(struct cache_head *h); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); -- cgit v1.2.3 From 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Mon, 27 Mar 2006 01:15:22 -0800 Subject: [PATCH] sched: new sched domain for representing multi-core Add a new sched domain for representing multi-core with shared caches between cores. Consider a dual package system, each package containing two cores and with last level cache shared between cores with in a package. If there are two runnable processes, with this appended patch those two processes will be scheduled on different packages. On such systems, with this patch we have observed 8% perf improvement with specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2 users). This new domain will come into play only on multi-core systems with shared caches. On other systems, this sched domain will be removed by domain degeneration code. This new domain can be also used for implementing power savings policy (see OLS 2005 CMP kernel scheduler paper for more details.. I will post another patch for power savings policy soon) Most of the arch/* file changes are for cpu_coregroup_map() implementation. Signed-off-by: Suresh Siddha Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/topology.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index e8eb0040ce3..a305ae2e44b 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -164,6 +164,15 @@ .nr_balance_failed = 0, \ } +#ifdef CONFIG_SCHED_MC +#ifndef SD_MC_INIT +/* for now its same as SD_CPU_INIT. + * TBD: Tune Domain parameters! + */ +#define SD_MC_INIT SD_CPU_INIT +#endif +#endif + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! -- cgit v1.2.3 From a117e66ed45ac0569c039ea60bd7a9a61e031858 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:25 -0800 Subject: [PATCH] unify pfn_to_page: generic functions There are 3 memory models, FLATMEM, DISCONTIGMEM, SPARSEMEM. Each arch has its own page_to_pfn(), pfn_to_page() for each models. But most of them can use the same arithmetic. This patch adds asm-generic/memory_model.h, which includes generic page_to_pfn(), pfn_to_page() definitions for each memory model. When CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y, out-of-line functions are used instead of macro. This is enabled by some archs and reduces text size. Signed-off-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Andi Kleen Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Russell King Cc: Ian Molton Cc: Mikael Starvik Cc: David Howells Cc: Yoshinori Sato Cc: Hirokazu Takata Cc: Ralf Baechle Cc: Kyle McMartin Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Paul Mundt Cc: Kazumoto Kojima Cc: Richard Curnow Cc: William Lee Irwin III Cc: "David S. Miller" Cc: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Cc: Miles Bader Cc: Chris Zankel Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ebfc238cc24..0c1c0c0cce6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -602,17 +602,6 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn) return __nr_to_section(pfn_to_section_nr(pfn)); } -#define pfn_to_page(pfn) \ -({ \ - unsigned long __pfn = (pfn); \ - __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ -}) -#define page_to_pfn(page) \ -({ \ - page - __section_mem_map_addr(__nr_to_section( \ - page_to_section(page))); \ -}) - static inline int pfn_valid(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) -- cgit v1.2.3 From a0140c1d85637ee5f4ea7c78f066e3611a6a79dc Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:55 -0800 Subject: [PATCH] remove zone_mem_map This patch removes zone_mem_map. pfn_to_page uses pgdat, page_to_pfn uses zone. page_to_pfn can use pgdat instead of zone, which is only one user of zone_mem_map. By modifing it, we can remove zone_mem_map. Signed-off-by: KAMEZAWA Hiroyuki Cc: Dave Hansen Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0c1c0c0cce6..ace31c515a8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -225,7 +225,6 @@ struct zone { * Discontig memory support fields. */ struct pglist_data *zone_pgdat; - struct page *zone_mem_map; /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; -- cgit v1.2.3 From 8357f8695d58b50fbf2bd507b4b0fc2cd1e43bd6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:57 -0800 Subject: [PATCH] define for_each_online_pgdat This patch defines for_each_online_pgdat() as a replacement of for_each_pgdat() Now, online nodes are managed by node_online_map. But for_each_pgdat() uses pgdat_link to iterate over all nodes(pgdat). This means management structure for online pgdat is duplicated. I think using node_online_map for for_each_pgdat() is simple and sane rather ather than pgdat_link. New macro is named as for_each_online_pgdat(). Following patch will fix callers of for_each_pgdat(). The bootmem allocater uses for_each_pgdat() before pgdat initialization. I don't think it's sane. Following patch will fix it. Signed-off-by: Yasunori Goto Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 108 +++++++++++++++++++++++++---------------------- include/linux/nodemask.h | 4 ++ 2 files changed, 61 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ace31c515a8..96eb0802509 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -13,6 +13,7 @@ #include #include #include +#include #include /* Free memory management - zoned buddy allocator. */ @@ -349,57 +350,6 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) -/** - * for_each_pgdat - helper macro to iterate over all nodes - * @pgdat - pointer to a pg_data_t variable - * - * Meant to help with common loops of the form - * pgdat = pgdat_list; - * while(pgdat) { - * ... - * pgdat = pgdat->pgdat_next; - * } - */ -#define for_each_pgdat(pgdat) \ - for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next) - -/* - * next_zone - helper magic for for_each_zone() - * Thanks to William Lee Irwin III for this piece of ingenuity. - */ -static inline struct zone *next_zone(struct zone *zone) -{ - pg_data_t *pgdat = zone->zone_pgdat; - - if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) - zone++; - else if (pgdat->pgdat_next) { - pgdat = pgdat->pgdat_next; - zone = pgdat->node_zones; - } else - zone = NULL; - - return zone; -} - -/** - * for_each_zone - helper macro to iterate over all memory zones - * @zone - pointer to struct zone variable - * - * The user only needs to declare the zone variable, for_each_zone - * fills it in. This basically means for_each_zone() is an - * easier to read version of this piece of code: - * - * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next) - * for (i = 0; i < MAX_NR_ZONES; ++i) { - * struct zone * z = pgdat->node_zones + i; - * ... - * } - * } - */ -#define for_each_zone(zone) \ - for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone)) - static inline int populated_zone(struct zone *zone) { return (!!zone->present_pages); @@ -471,6 +421,62 @@ extern struct pglist_data contig_page_data; #endif /* !CONFIG_NEED_MULTIPLE_NODES */ +static inline struct pglist_data *first_online_pgdat(void) +{ + return NODE_DATA(first_online_node); +} + +static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) +{ + int nid = next_online_node(pgdat->node_id); + + if (nid == MAX_NUMNODES) + return NULL; + return NODE_DATA(nid); +} + + +/** + * for_each_pgdat - helper macro to iterate over all nodes + * @pgdat - pointer to a pg_data_t variable + */ +#define for_each_online_pgdat(pgdat) \ + for (pgdat = first_online_pgdat(); \ + pgdat; \ + pgdat = next_online_pgdat(pgdat)) + +/* + * next_zone - helper magic for for_each_zone() + * Thanks to William Lee Irwin III for this piece of ingenuity. + */ +static inline struct zone *next_zone(struct zone *zone) +{ + pg_data_t *pgdat = zone->zone_pgdat; + + if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) + zone++; + else { + pgdat = next_online_pgdat(pgdat); + if (pgdat) + zone = pgdat->node_zones; + else + zone = NULL; + } + return zone; +} + +/** + * for_each_zone - helper macro to iterate over all memory zones + * @zone - pointer to struct zone variable + * + * The user only needs to declare the zone variable, for_each_zone + * fills it in. + */ +#define for_each_zone(zone) \ + for (zone = (first_online_pgdat())->node_zones; \ + zone; \ + zone = next_zone(zone)) + #ifdef CONFIG_SPARSEMEM #include #endif diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index b959a4525cb..1a9ef3e627d 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -350,11 +350,15 @@ extern nodemask_t node_possible_map; #define num_possible_nodes() nodes_weight(node_possible_map) #define node_online(node) node_isset((node), node_online_map) #define node_possible(node) node_isset((node), node_possible_map) +#define first_online_node first_node(node_online_map) +#define next_online_node(nid) next_node((nid), node_online_map) #else #define num_online_nodes() 1 #define num_possible_nodes() 1 #define node_online(node) ((node) == 0) #define node_possible(node) ((node) == 0) +#define first_online_node 0 +#define next_online_node(nid) (MAX_NUMNODES) #endif #define any_online_node(mask) \ -- cgit v1.2.3 From 679bc9fbb508a0aac9539b2de747eb5849feb428 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:58 -0800 Subject: [PATCH] for_each_online_pgdat: for_each_bootmem Add a list_head to bootmem_data_t and make bootmems use it. bootmem list is sorted by node_boot_start. Only nodes against which init_bootmem() is called are linked to the list. (i386 allocates bootmem only from one node(0) not from all online nodes.) A summary: 1. for_each_online_pgdat() traverses all *online* nodes. 2. alloc_bootmem() allocates memory only from initialized-for-bootmem nodes. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 7155452fb4a..de3eb8d8ae2 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -38,6 +38,7 @@ typedef struct bootmem_data { unsigned long last_pos; unsigned long last_success; /* Previous allocation point. To speed * up searching */ + struct list_head list; } bootmem_data_t; extern unsigned long __init bootmem_bootmap_pages (unsigned long); -- cgit v1.2.3 From ae0f15fb91274e67d78836d38c99ec363df33073 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:16:01 -0800 Subject: [PATCH] for_each_online_pgdat: remove pgdat_list By using for_each_online_pgdat(), pgdat_list is not necessary now. This patch removes it. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 96eb0802509..0d12c3cf1f8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -307,7 +307,6 @@ typedef struct pglist_data { unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; - struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; @@ -324,8 +323,6 @@ typedef struct pglist_data { #include -extern struct pglist_data *pgdat_list; - void __get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free, struct pglist_data *pgdat); void get_zone_counts(unsigned long *active, unsigned long *inactive, -- cgit v1.2.3 From 95144c788dc01b6a0ff2c9c2222e37ffdab358b8 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:16:02 -0800 Subject: [PATCH] uninline zone helpers Helper functions for for_each_online_pgdat/for_each_zone look too big to be inlined. Speed of these helper macro itself is not very important. (inner loops are tend to do more work than this) This patch make helper function to be out-of-lined. inline out-of-line .text 005c0680 005bf6a0 005c0680 - 005bf6a0 = FE0 = 4Kbytes. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 38 +++----------------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0d12c3cf1f8..b5c21122c29 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -418,20 +418,9 @@ extern struct pglist_data contig_page_data; #endif /* !CONFIG_NEED_MULTIPLE_NODES */ -static inline struct pglist_data *first_online_pgdat(void) -{ - return NODE_DATA(first_online_node); -} - -static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) -{ - int nid = next_online_node(pgdat->node_id); - - if (nid == MAX_NUMNODES) - return NULL; - return NODE_DATA(nid); -} - +extern struct pglist_data *first_online_pgdat(void); +extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); +extern struct zone *next_zone(struct zone *zone); /** * for_each_pgdat - helper macro to iterate over all nodes @@ -441,27 +430,6 @@ static inline struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) for (pgdat = first_online_pgdat(); \ pgdat; \ pgdat = next_online_pgdat(pgdat)) - -/* - * next_zone - helper magic for for_each_zone() - * Thanks to William Lee Irwin III for this piece of ingenuity. - */ -static inline struct zone *next_zone(struct zone *zone) -{ - pg_data_t *pgdat = zone->zone_pgdat; - - if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) - zone++; - else { - pgdat = next_online_pgdat(pgdat); - if (pgdat) - zone = pgdat->node_zones; - else - zone = NULL; - } - return zone; -} - /** * for_each_zone - helper macro to iterate over all memory zones * @zone - pointer to struct zone variable -- cgit v1.2.3 From 22a9835c350782a5c3257343713932af3ac92ee0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 27 Mar 2006 01:16:04 -0800 Subject: [PATCH] unify PFN_* macros Just about every architecture defines some macros to do operations on pfns. They're all virtually identical. This patch consolidates all of them. One minor glitch is that at least i386 uses them in a very skeletal header file. To keep away from #include dependency hell, I stuck the new definitions in a new, isolated header. Of all of the implementations, sh64 is the only one that varied by a bit. It used some masks to ensure that any sign-extension got ripped away before the arithmetic is done. This has been posted to that sh64 maintainers and the development list. Compiles on x86, x86_64, ia64 and ppc64. Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pfn.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/linux/pfn.h (limited to 'include/linux') diff --git a/include/linux/pfn.h b/include/linux/pfn.h new file mode 100644 index 00000000000..bb01f8b92b5 --- /dev/null +++ b/include/linux/pfn.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_PFN_H_ +#define _LINUX_PFN_H_ + +#define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK) +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) + +#endif -- cgit v1.2.3 From 0771dfefc9e538f077d0b43b6dec19a5a67d0e70 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:22 -0800 Subject: [PATCH] lightweight robust futexes: core Add the core infrastructure for robust futexes: structure definitions, the new syscalls and the do_exit() based cleanup mechanism. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven Acked-by: Ulrich Drepper Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 95 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 3 ++ include/linux/threads.h | 3 +- 3 files changed, 100 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 10f96c31971..20face6b798 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -1,6 +1,8 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H +#include + /* Second argument to futex syscall */ @@ -11,10 +13,103 @@ #define FUTEX_CMP_REQUEUE 4 #define FUTEX_WAKE_OP 5 +/* + * Support for robust futexes: the kernel cleans up held futexes at + * thread exit time. + */ + +/* + * Per-lock list entry - embedded in user-space locks, somewhere close + * to the futex field. (Note: user-space uses a double-linked list to + * achieve O(1) list add and remove, but the kernel only needs to know + * about the forward link) + * + * NOTE: this structure is part of the syscall ABI, and must not be + * changed. + */ +struct robust_list { + struct robust_list __user *next; +}; + +/* + * Per-thread list head: + * + * NOTE: this structure is part of the syscall ABI, and must only be + * changed if the change is first communicated with the glibc folks. + * (When an incompatible change is done, we'll increase the structure + * size, which glibc will detect) + */ +struct robust_list_head { + /* + * The head of the list. Points back to itself if empty: + */ + struct robust_list list; + + /* + * This relative offset is set by user-space, it gives the kernel + * the relative position of the futex field to examine. This way + * we keep userspace flexible, to freely shape its data-structure, + * without hardcoding any particular offset into the kernel: + */ + long futex_offset; + + /* + * The death of the thread may race with userspace setting + * up a lock's links. So to handle this race, userspace first + * sets this field to the address of the to-be-taken lock, + * then does the lock acquire, and then adds itself to the + * list, and then clears this field. Hence the kernel will + * always have full knowledge of all locks that the thread + * _might_ have taken. We check the owner TID in any case, + * so only truly owned locks will be handled. + */ + struct robust_list __user *list_op_pending; +}; + +/* + * Are there any waiters for this robust futex: + */ +#define FUTEX_WAITERS 0x80000000 + +/* + * The kernel signals via this bit that a thread holding a futex + * has exited without unlocking the futex. The kernel also does + * a FUTEX_WAKE on such futexes, after setting the bit, to wake + * up any possible waiters: + */ +#define FUTEX_OWNER_DIED 0x40000000 + +/* + * Reserved bit: + */ +#define FUTEX_OWNER_PENDING 0x20000000 + +/* + * The rest of the robust-futex field is for the TID: + */ +#define FUTEX_TID_MASK 0x1fffffff + +/* + * A limit of one million locks held per thread (!) ought to be enough + * for some time. This also protects against a deliberately circular + * list. Not worth introducing an rlimit for this: + */ +#define ROBUST_LIST_LIMIT 1048576 + long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, int val3); +extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr); + +#ifdef CONFIG_FUTEX +extern void exit_robust_list(struct task_struct *curr); +#else +static inline void exit_robust_list(struct task_struct *curr) +{ +} +#endif + #define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ #define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */ #define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 036d14d2bf9..fd4848f2d75 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,7 @@ #include #include #include +#include #include /* For AT_VECTOR_SIZE */ @@ -872,6 +873,8 @@ struct task_struct { int cpuset_mems_generation; int cpuset_mem_spread_rotor; #endif + struct robust_list_head __user *robust_list; + atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; }; diff --git a/include/linux/threads.h b/include/linux/threads.h index b59738ac619..e646bcdf261 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -28,7 +28,8 @@ #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000) /* - * A maximum of 4 million PIDs should be enough for a while: + * A maximum of 4 million PIDs should be enough for a while. + * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.] */ #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) -- cgit v1.2.3 From 34f192c6527f20c47ccec239e7d51a27691b93fc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:24 -0800 Subject: [PATCH] lightweight robust futexes: compat 32-bit syscall compatibility support. (This patch also moves all futex related compat functionality into kernel/futex_compat.c.) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven Acked-by: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 18 ++++++++++++++++++ include/linux/sched.h | 3 +++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 24d659cdbaf..6d3a654be1a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -147,6 +147,24 @@ typedef struct compat_sigevent { } _sigev_un; } compat_sigevent_t; +struct compat_robust_list { + compat_uptr_t next; +}; + +struct compat_robust_list_head { + struct compat_robust_list list; + compat_long_t futex_offset; + compat_uptr_t list_op_pending; +}; + +extern void compat_exit_robust_list(struct task_struct *curr); + +asmlinkage long +compat_sys_set_robust_list(struct compat_robust_list_head __user *head, + compat_size_t len); +asmlinkage long +compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr, + compat_size_t __user *len_ptr); long compat_sys_semctl(int first, int second, int third, void __user *uptr); long compat_sys_msgsnd(int first, int second, int third, void __user *uptr); diff --git a/include/linux/sched.h b/include/linux/sched.h index fd4848f2d75..20b4f0372e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -874,6 +874,9 @@ struct task_struct { int cpuset_mem_spread_rotor; #endif struct robust_list_head __user *robust_list; +#ifdef CONFIG_COMPAT + struct compat_robust_list_head __user *compat_robust_list; +#endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; -- cgit v1.2.3 From 8f17d3a5049d32392b79925c73a0cf99ce6d5af0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:27 -0800 Subject: [PATCH] lightweight robust futexes updates - fix: initialize the robust list(s) to NULL in copy_process. - doc update - cleanup: rename _inuser to _inatomic - __user cleanups and other small cleanups Signed-off-by: Ingo Molnar Cc: Thomas Gleixner Cc: Arjan van de Ven Cc: Ulrich Drepper Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 20face6b798..55fff96ae85 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -100,7 +100,7 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, int val3); -extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr); +extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr); #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); -- cgit v1.2.3 From 76b81e2b0e2241accebcc68e126bc5ab958661b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:28 -0800 Subject: [PATCH] lightweight robust futexes updates 2 futex.h updates: - get rid of FUTEX_OWNER_PENDING - it's not used - reduce ROBUST_LIST_LIMIT to a saner value Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 55fff96ae85..966a5b3da43 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -79,22 +79,16 @@ struct robust_list_head { */ #define FUTEX_OWNER_DIED 0x40000000 -/* - * Reserved bit: - */ -#define FUTEX_OWNER_PENDING 0x20000000 - /* * The rest of the robust-futex field is for the TID: */ -#define FUTEX_TID_MASK 0x1fffffff +#define FUTEX_TID_MASK 0x3fffffff /* - * A limit of one million locks held per thread (!) ought to be enough - * for some time. This also protects against a deliberately circular - * list. Not worth introducing an rlimit for this: + * This limit protects against a deliberately circular list. + * (Not worth introducing an rlimit for it) */ -#define ROBUST_LIST_LIMIT 1048576 +#define ROBUST_LIST_LIMIT 2048 long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, -- cgit v1.2.3 From e041c683412d5bf44dc2b109053e3b837b71742d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 27 Mar 2006 01:16:30 -0800 Subject: [PATCH] Notifier chain update: API changes The kernel's implementation of notifier chains is unsafe. There is no protection against entries being added to or removed from a chain while the chain is in use. The issues were discussed in this thread: http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2 We noticed that notifier chains in the kernel fall into two basic usage classes: "Blocking" chains are always called from a process context and the callout routines are allowed to sleep; "Atomic" chains can be called from an atomic context and the callout routines are not allowed to sleep. We decided to codify this distinction and make it part of the API. Therefore this set of patches introduces three new, parallel APIs: one for blocking notifiers, one for atomic notifiers, and one for "raw" notifiers (which is really just the old API under a new name). New kinds of data structures are used for the heads of the chains, and new routines are defined for registration, unregistration, and calling a chain. The three APIs are explained in include/linux/notifier.h and their implementation is in kernel/sys.c. With atomic and blocking chains, the implementation guarantees that the chain links will not be corrupted and that chain callers will not get messed up by entries being added or removed. For raw chains the implementation provides no guarantees at all; users of this API must provide their own protections. (The idea was that situations may come up where the assumptions of the atomic and blocking APIs are not appropriate, so it should be possible for users to handle these things in their own way.) There are some limitations, which should not be too hard to live with. For atomic/blocking chains, registration and unregistration must always be done in a process context since the chain is protected by a mutex/rwsem. Also, a callout routine for a non-raw chain must not try to register or unregister entries on its own chain. (This did happen in a couple of places and the code had to be changed to avoid it.) Since atomic chains may be called from within an NMI handler, they cannot use spinlocks for synchronization. Instead we use RCU. The overhead falls almost entirely in the unregister routine, which is okay since unregistration is much less frequent that calling a chain. Here is the list of chains that we adjusted and their classifications. None of them use the raw API, so for the moment it is only a placeholder. ATOMIC CHAINS ------------- arch/i386/kernel/traps.c: i386die_chain arch/ia64/kernel/traps.c: ia64die_chain arch/powerpc/kernel/traps.c: powerpc_die_chain arch/sparc64/kernel/traps.c: sparc64die_chain arch/x86_64/kernel/traps.c: die_chain drivers/char/ipmi/ipmi_si_intf.c: xaction_notifier_list kernel/panic.c: panic_notifier_list kernel/profile.c: task_free_notifier net/bluetooth/hci_core.c: hci_notifier net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_chain net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_expect_chain net/ipv6/addrconf.c: inet6addr_chain net/netfilter/nf_conntrack_core.c: nf_conntrack_chain net/netfilter/nf_conntrack_core.c: nf_conntrack_expect_chain net/netlink/af_netlink.c: netlink_chain BLOCKING CHAINS --------------- arch/powerpc/platforms/pseries/reconfig.c: pSeries_reconfig_chain arch/s390/kernel/process.c: idle_chain arch/x86_64/kernel/process.c idle_notifier drivers/base/memory.c: memory_chain drivers/cpufreq/cpufreq.c cpufreq_policy_notifier_list drivers/cpufreq/cpufreq.c cpufreq_transition_notifier_list drivers/macintosh/adb.c: adb_client_list drivers/macintosh/via-pmu.c sleep_notifier_list drivers/macintosh/via-pmu68k.c sleep_notifier_list drivers/macintosh/windfarm_core.c wf_client_list drivers/usb/core/notify.c usb_notifier_list drivers/video/fbmem.c fb_notifier_list kernel/cpu.c cpu_chain kernel/module.c module_notify_list kernel/profile.c munmap_notifier kernel/profile.c task_exit_notifier kernel/sys.c reboot_notifier_list net/core/dev.c netdev_chain net/decnet/dn_dev.c: dnaddr_chain net/ipv4/devinet.c: inetaddr_chain It's possible that some of these classifications are wrong. If they are, please let us know or submit a patch to fix them. Note that any chain that gets called very frequently should be atomic, because the rwsem read-locking used for blocking chains is very likely to incur cache misses on SMP systems. (However, if the chain's callout routines may sleep then the chain cannot be atomic.) The patch set was written by Alan Stern and Chandra Seetharaman, incorporating material written by Keith Owens and suggestions from Paul McKenney and Andrew Morton. [jes@sgi.com: restructure the notifier chain initialization macros] Signed-off-by: Alan Stern Signed-off-by: Chandra Seetharaman Signed-off-by: Jes Sorensen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/adb.h | 2 +- include/linux/kernel.h | 2 +- include/linux/memory.h | 1 - include/linux/netfilter_ipv4/ip_conntrack.h | 17 ++--- include/linux/notifier.h | 96 ++++++++++++++++++++++++++--- 5 files changed, 100 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/adb.h b/include/linux/adb.h index e9fdc63483c..b7305b17827 100644 --- a/include/linux/adb.h +++ b/include/linux/adb.h @@ -85,7 +85,7 @@ enum adb_message { ADB_MSG_POST_RESET /* Called after resetting the bus (re-do init & register) */ }; extern struct adb_driver *adb_controller; -extern struct notifier_block *adb_client_list; +extern struct blocking_notifier_head adb_client_list; int adb_request(struct adb_request *req, void (*done)(struct adb_request *), int flags, int nbytes, ...); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 03d6cfaa5b8..a3720f973ea 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -87,7 +87,7 @@ extern int cond_resched(void); (__x < 0) ? -__x : __x; \ }) -extern struct notifier_block *panic_notifier_list; +extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(long time); NORET_TYPE void panic(const char * fmt, ...) __attribute__ ((NORET_AND format (printf, 1, 2))); diff --git a/include/linux/memory.h b/include/linux/memory.h index e251dc43d0f..8f04143ca36 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -77,7 +77,6 @@ extern int remove_memory_block(unsigned long, struct mem_section *, int); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< +#include +#include -struct notifier_block -{ - int (*notifier_call)(struct notifier_block *self, unsigned long, void *); +/* + * Notifier chains are of three types: + * + * Atomic notifier chains: Chain callbacks run in interrupt/atomic + * context. Callouts are not allowed to block. + * Blocking notifier chains: Chain callbacks run in process context. + * Callouts are allowed to block. + * Raw notifier chains: There are no restrictions on callbacks, + * registration, or unregistration. All locking and protection + * must be provided by the caller. + * + * atomic_notifier_chain_register() may be called from an atomic context, + * but blocking_notifier_chain_register() must be called from a process + * context. Ditto for the corresponding _unregister() routines. + * + * atomic_notifier_chain_unregister() and blocking_notifier_chain_unregister() + * _must not_ be called from within the call chain. + */ + +struct notifier_block { + int (*notifier_call)(struct notifier_block *, unsigned long, void *); struct notifier_block *next; int priority; }; +struct atomic_notifier_head { + spinlock_t lock; + struct notifier_block *head; +}; + +struct blocking_notifier_head { + struct rw_semaphore rwsem; + struct notifier_block *head; +}; + +struct raw_notifier_head { + struct notifier_block *head; +}; + +#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ + spin_lock_init(&(name)->lock); \ + (name)->head = NULL; \ + } while (0) +#define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \ + init_rwsem(&(name)->rwsem); \ + (name)->head = NULL; \ + } while (0) +#define RAW_INIT_NOTIFIER_HEAD(name) do { \ + (name)->head = NULL; \ + } while (0) + +#define ATOMIC_NOTIFIER_INIT(name) { \ + .lock = SPIN_LOCK_UNLOCKED, \ + .head = NULL } +#define BLOCKING_NOTIFIER_INIT(name) { \ + .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ + .head = NULL } +#define RAW_NOTIFIER_INIT(name) { \ + .head = NULL } + +#define ATOMIC_NOTIFIER_HEAD(name) \ + struct atomic_notifier_head name = \ + ATOMIC_NOTIFIER_INIT(name) +#define BLOCKING_NOTIFIER_HEAD(name) \ + struct blocking_notifier_head name = \ + BLOCKING_NOTIFIER_INIT(name) +#define RAW_NOTIFIER_HEAD(name) \ + struct raw_notifier_head name = \ + RAW_NOTIFIER_INIT(name) #ifdef __KERNEL__ -extern int notifier_chain_register(struct notifier_block **list, struct notifier_block *n); -extern int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n); -extern int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v); +extern int atomic_notifier_chain_register(struct atomic_notifier_head *, + struct notifier_block *); +extern int blocking_notifier_chain_register(struct blocking_notifier_head *, + struct notifier_block *); +extern int raw_notifier_chain_register(struct raw_notifier_head *, + struct notifier_block *); + +extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *, + struct notifier_block *); +extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *, + struct notifier_block *); +extern int raw_notifier_chain_unregister(struct raw_notifier_head *, + struct notifier_block *); + +extern int atomic_notifier_call_chain(struct atomic_notifier_head *, + unsigned long val, void *v); +extern int blocking_notifier_call_chain(struct blocking_notifier_head *, + unsigned long val, void *v); +extern int raw_notifier_call_chain(struct raw_notifier_head *, + unsigned long val, void *v); #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ -#define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) /* Bad/Veto action */ +#define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) + /* Bad/Veto action */ /* * Clean way to return from the notifier and stop further calls. */ -- cgit v1.2.3 From c58411e95d7f5062dedd1a3064af4d359da1e633 Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Mon, 27 Mar 2006 01:16:34 -0800 Subject: [PATCH] RTC Subsystem: library functions RTC and date/time related functions. Signed-off-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rtc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b739ac1f7ca..8454337c705 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -95,6 +95,11 @@ struct rtc_pll_info { #include +extern int rtc_month_days(unsigned int month, unsigned int year); +extern int rtc_valid_tm(struct rtc_time *tm); +extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time); +extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm); + typedef struct rtc_task { void (*func)(void *private_data); void *private_data; -- cgit v1.2.3 From 0c86edc0d4970649f39748c4ce4f2895f728468f Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Mon, 27 Mar 2006 01:16:37 -0800 Subject: [PATCH] RTC subsystem: class Add the basic RTC subsystem infrastructure to the kernel. rtc/class.c - registration facilities for RTC drivers rtc/interface.c - kernel/rtc interface functions rtc/hctosys.c - snippet of code that copies hw clock to sw clock at bootup, if configured to do so. Signed-off-by: Alessandro Zummo Acked-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rtc.h | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 8454337c705..ab61cd1199f 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -91,6 +91,12 @@ struct rtc_pll_info { #define RTC_PLL_GET _IOR('p', 0x11, struct rtc_pll_info) /* Get PLL correction */ #define RTC_PLL_SET _IOW('p', 0x12, struct rtc_pll_info) /* Set PLL correction */ +/* interrupt flags */ +#define RTC_IRQF 0x80 /* any of the following is active */ +#define RTC_PF 0x40 +#define RTC_AF 0x20 +#define RTC_UF 0x10 + #ifdef __KERNEL__ #include @@ -100,6 +106,87 @@ extern int rtc_valid_tm(struct rtc_time *tm); extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time); extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm); +#include +#include +#include +#include +#include + +extern struct class *rtc_class; + +struct rtc_class_ops { + int (*open)(struct device *); + void (*release)(struct device *); + int (*ioctl)(struct device *, unsigned int, unsigned long); + int (*read_time)(struct device *, struct rtc_time *); + int (*set_time)(struct device *, struct rtc_time *); + int (*read_alarm)(struct device *, struct rtc_wkalrm *); + int (*set_alarm)(struct device *, struct rtc_wkalrm *); + int (*proc)(struct device *, struct seq_file *); + int (*set_mmss)(struct device *, unsigned long secs); + int (*irq_set_state)(struct device *, int enabled); + int (*irq_set_freq)(struct device *, int freq); + int (*read_callback)(struct device *, int data); +}; + +#define RTC_DEVICE_NAME_SIZE 20 +struct rtc_task; + +struct rtc_device +{ + struct class_device class_dev; + struct module *owner; + + int id; + char name[RTC_DEVICE_NAME_SIZE]; + + struct rtc_class_ops *ops; + struct mutex ops_lock; + + struct class_device *rtc_dev; + struct cdev char_dev; + struct mutex char_lock; + + unsigned long irq_data; + spinlock_t irq_lock; + wait_queue_head_t irq_queue; + struct fasync_struct *async_queue; + + struct rtc_task *irq_task; + spinlock_t irq_task_lock; + int irq_freq; +}; +#define to_rtc_device(d) container_of(d, struct rtc_device, class_dev) + +extern struct rtc_device *rtc_device_register(const char *name, + struct device *dev, + struct rtc_class_ops *ops, + struct module *owner); +extern void rtc_device_unregister(struct rtc_device *rdev); +extern int rtc_interface_register(struct class_interface *intf); + +extern int rtc_read_time(struct class_device *class_dev, struct rtc_time *tm); +extern int rtc_set_time(struct class_device *class_dev, struct rtc_time *tm); +extern int rtc_set_mmss(struct class_device *class_dev, unsigned long secs); +extern int rtc_read_alarm(struct class_device *class_dev, + struct rtc_wkalrm *alrm); +extern int rtc_set_alarm(struct class_device *class_dev, + struct rtc_wkalrm *alrm); +extern void rtc_update_irq(struct class_device *class_dev, + unsigned long num, unsigned long events); + +extern struct class_device *rtc_class_open(char *name); +extern void rtc_class_close(struct class_device *class_dev); + +extern int rtc_irq_register(struct class_device *class_dev, + struct rtc_task *task); +extern void rtc_irq_unregister(struct class_device *class_dev, + struct rtc_task *task); +extern int rtc_irq_set_state(struct class_device *class_dev, + struct rtc_task *task, int enabled); +extern int rtc_irq_set_freq(struct class_device *class_dev, + struct rtc_task *task, int freq); + typedef struct rtc_task { void (*func)(void *private_data); void *private_data; -- cgit v1.2.3 From 6fc7f10cee28c7fa190920fefda8c696d5bf3074 Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Mon, 27 Mar 2006 01:16:37 -0800 Subject: [PATCH] RTC subsystem: I2C cleanup This patch, completely optional, removes from drivers/i2c/chips all the drivers that are implemented in the new RTC subsystem. It should be noted that none of the current driver is actually integrated, i.e. usable without further patches. Signed-off-by: Alessandro Zummo Acked-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/x1205.h | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 include/linux/x1205.h (limited to 'include/linux') diff --git a/include/linux/x1205.h b/include/linux/x1205.h deleted file mode 100644 index 64fd3af894a..00000000000 --- a/include/linux/x1205.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * x1205.h - defines for drivers/i2c/chips/x1205.c - * Copyright 2004 Karen Spearel - * Copyright 2005 Alessandro Zummo - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __LINUX_X1205_H__ -#define __LINUX_X1205_H__ - -/* commands */ - -#define X1205_CMD_GETDATETIME 0 -#define X1205_CMD_SETTIME 1 -#define X1205_CMD_SETDATETIME 2 -#define X1205_CMD_GETALARM 3 -#define X1205_CMD_SETALARM 4 -#define X1205_CMD_GETDTRIM 5 -#define X1205_CMD_SETDTRIM 6 -#define X1205_CMD_GETATRIM 7 -#define X1205_CMD_SETATRIM 8 - -extern int x1205_do_command(unsigned int cmd, void *arg); -extern int x1205_direct_attach(int adapter_id, - struct i2c_client_address_data *address_data); - -#endif /* __LINUX_X1205_H__ */ -- cgit v1.2.3 From f7f3682fb2f8bc8a9c912baeea15454416ca1972 Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Mon, 27 Mar 2006 01:16:38 -0800 Subject: [PATCH] RTC subsystem: I2C driver ids This patch adds the I2C driver ids to i2c-id.h in preparation of the I2C direct probing method. This is kept separate so that it can be integrated to Signed-off-by: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c-id.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 679b46a6a56..c8b81f419fd 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -108,6 +108,10 @@ #define I2C_DRIVERID_UPD64083 78 /* upd64083 video processor */ #define I2C_DRIVERID_UPD64031A 79 /* upd64031a video processor */ #define I2C_DRIVERID_SAA717X 80 /* saa717x video encoder */ +#define I2C_DRIVERID_DS1672 81 /* Dallas/Maxim DS1672 RTC */ +#define I2C_DRIVERID_X1205 82 /* Xicor/Intersil X1205 RTC */ +#define I2C_DRIVERID_PCF8563 83 /* Philips PCF8563 RTC */ +#define I2C_DRIVERID_RS5C372 84 /* Ricoh RS5C372 RTC */ #define I2C_DRIVERID_I2CDEV 900 #define I2C_DRIVERID_ARP 902 /* SMBus ARP Client */ -- cgit v1.2.3 From 1d98af87270cc08bb8251e004b9dc63cc838f24b Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Mon, 27 Mar 2006 01:16:47 -0800 Subject: [PATCH] RTC subsystem: M48T86 driver Add a driver for the ST M48T86 / Dallas DS12887 RTC. This is a platform driver. The platform device must provide I/O routines to access the RTC. Signed-off-by: Alessandro Zummo Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/m48t86.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/m48t86.h (limited to 'include/linux') diff --git a/include/linux/m48t86.h b/include/linux/m48t86.h new file mode 100644 index 00000000000..9065199319d --- /dev/null +++ b/include/linux/m48t86.h @@ -0,0 +1,16 @@ +/* + * ST M48T86 / Dallas DS12887 RTC driver + * Copyright (c) 2006 Tower Technologies + * + * Author: Alessandro Zummo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +struct m48t86_ops +{ + void (*writeb)(unsigned char value, unsigned long addr); + unsigned char (*readb)(unsigned long addr); +}; -- cgit v1.2.3 From ed49843b897da9969e349c279ffc832efcb93213 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Mon, 27 Mar 2006 01:17:36 -0800 Subject: [PATCH] Add ID for Quadro NVS280 Quadro NVS280 is a dual-head PCIe card with PCI ID 10de:00fd and subsystem ID 10de:0215. Signed-off-by: Pavel Roskin Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6f080ae5928..02f6cf20b14 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1052,6 +1052,7 @@ #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6600_ALT2 0x00f2 #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6200_ALT1 0x00f3 #define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_GT 0x00f9 +#define PCIE_DEVICE_ID_NVIDIA_QUADRO_NVS280 0x00fd #define PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR 0x0100 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR 0x0101 #define PCI_DEVICE_ID_NVIDIA_QUADRO 0x0103 -- cgit v1.2.3 From 969429b504ae866d3f8b1cafd68a2c099e305093 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:17:49 -0800 Subject: [PATCH] dm: make sure QUEUE_FLAG_CLUSTER is set properly This flag should be set for a virtual device iff it is set for all underlying devices. Signed-off-by: Neil Brown Acked-by: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/device-mapper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 51e0e95a421..aee10b2ea4c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -97,6 +97,7 @@ struct io_restrictions { unsigned short hardsect_size; unsigned int max_segment_size; unsigned long seg_boundary_mask; + unsigned char no_cluster; /* inverted so that 0 is default */ }; struct dm_target { -- cgit v1.2.3 From 3ac51e741a46af7a20f55e79d3e3aeaa93c6c544 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 27 Mar 2006 01:17:54 -0800 Subject: [PATCH] dm store geometry Allow drive geometry to be stored with a new DM_DEV_SET_GEOMETRY ioctl. Device-mapper will now respond to HDIO_GETGEO. If the geometry information is not available, zero will be returned for all of the parameters. Signed-off-by: Darrick J. Wong Signed-off-by: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat_ioctl.h | 2 ++ include/linux/dm-ioctl.h | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index efb518f16bb..89ab677cb99 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -140,6 +140,7 @@ COMPATIBLE_IOCTL(DM_TABLE_DEPS_32) COMPATIBLE_IOCTL(DM_TABLE_STATUS_32) COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32) COMPATIBLE_IOCTL(DM_TARGET_MSG_32) +COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY_32) COMPATIBLE_IOCTL(DM_VERSION) COMPATIBLE_IOCTL(DM_REMOVE_ALL) COMPATIBLE_IOCTL(DM_LIST_DEVICES) @@ -155,6 +156,7 @@ COMPATIBLE_IOCTL(DM_TABLE_DEPS) COMPATIBLE_IOCTL(DM_TABLE_STATUS) COMPATIBLE_IOCTL(DM_LIST_VERSIONS) COMPATIBLE_IOCTL(DM_TARGET_MSG) +COMPATIBLE_IOCTL(DM_DEV_SET_GEOMETRY) /* Big K */ COMPATIBLE_IOCTL(PIO_FONT) COMPATIBLE_IOCTL(GIO_FONT) diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index fa75ba0d635..c67c6786612 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -80,6 +80,16 @@ * * DM_TARGET_MSG: * Pass a message string to the target at a specific offset of a device. + * + * DM_DEV_SET_GEOMETRY: + * Set the geometry of a device by passing in a string in this format: + * + * "cylinders heads sectors_per_track start_sector" + * + * Beware that CHS geometry is nearly obsolete and only provided + * for compatibility with dm devices that can be booted by a PC + * BIOS. See struct hd_geometry for range limits. Also note that + * the geometry is erased if the device size changes. */ /* @@ -218,6 +228,7 @@ enum { /* Added later */ DM_LIST_VERSIONS_CMD, DM_TARGET_MSG_CMD, + DM_DEV_SET_GEOMETRY_CMD }; /* @@ -247,6 +258,7 @@ typedef char ioctl_struct[308]; #define DM_TABLE_STATUS_32 _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, ioctl_struct) #define DM_LIST_VERSIONS_32 _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, ioctl_struct) #define DM_TARGET_MSG_32 _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, ioctl_struct) +#define DM_DEV_SET_GEOMETRY_32 _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, ioctl_struct) #endif #define DM_IOCTL 0xfd @@ -270,11 +282,12 @@ typedef char ioctl_struct[308]; #define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl) #define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl) +#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 5 +#define DM_VERSION_MINOR 6 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2005-10-04)" +#define DM_VERSION_EXTRA "-ioctl (2006-02-17)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 6a4d44c1f1108d6c9e8850e8cf166aaba0e56eae Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Mon, 27 Mar 2006 01:17:55 -0800 Subject: [PATCH] dm/md dependency tree in sysfs: holders/slaves subdirectory Creating "slaves" and "holders" directories in /sys/block/ and creating "holders" directory under /sys/block// Signed-off-by: Jun'ichi Nomura Cc: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genhd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fd647fde5ec..eea61cc8fac 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -78,6 +78,9 @@ struct hd_struct { sector_t start_sect; sector_t nr_sects; struct kobject kobj; +#ifdef CONFIG_SYSFS + struct kobject *holder_dir; +#endif unsigned ios[2], sectors[2]; /* READs and WRITEs */ int policy, partno; }; @@ -114,6 +117,10 @@ struct gendisk { int number; /* more of the same */ struct device *driverfs_dev; struct kobject kobj; +#ifdef CONFIG_SYSFS + struct kobject *holder_dir; + struct kobject *slave_dir; +#endif struct timer_rand_state *random; int policy; -- cgit v1.2.3 From 100873687d81d4ce7b1299b447d33e87ba1e9583 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 27 Mar 2006 01:17:56 -0800 Subject: [PATCH] dm-md-dependency-tree-in-sysfs-holders-slaves-subdirectory-tidy Remove all the CONFIG_SYSFS stuff. That's supposed to all be implemented up in header files. Yes, the CONFIG_SYSFS=n data structures will be a little larger than necessary, but that's a tradeoff we can decide to make. Cc: Jun'ichi Nomura Cc: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genhd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index eea61cc8fac..bd7db861041 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -78,9 +78,7 @@ struct hd_struct { sector_t start_sect; sector_t nr_sects; struct kobject kobj; -#ifdef CONFIG_SYSFS struct kobject *holder_dir; -#endif unsigned ios[2], sectors[2]; /* READs and WRITEs */ int policy, partno; }; @@ -117,10 +115,8 @@ struct gendisk { int number; /* more of the same */ struct device *driverfs_dev; struct kobject kobj; -#ifdef CONFIG_SYSFS struct kobject *holder_dir; struct kobject *slave_dir; -#endif struct timer_rand_state *random; int policy; -- cgit v1.2.3 From 641dc636b0475582e48584340b774bd1e90d40d9 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Mon, 27 Mar 2006 01:17:57 -0800 Subject: [PATCH] dm/md dependency tree in sysfs: bd_claim_by_kobject Adding bd_claim_by_kobject() function which takes kobject as additional signature of holder device and creates sysfs symlinks between holder device and claimed device. bd_release_from_kobject() is a counterpart of bd_claim_by_kobject. Signed-off-by: Jun'ichi Nomura Cc: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9d967494695..680d913350e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -410,6 +410,9 @@ struct block_device { struct list_head bd_inodes; void * bd_holder; int bd_holders; +#ifdef CONFIG_SYSFS + struct list_head bd_holder_list; +#endif struct block_device * bd_contains; unsigned bd_block_size; struct hd_struct * bd_part; @@ -1399,6 +1402,13 @@ extern int blkdev_get(struct block_device *, mode_t, unsigned); extern int blkdev_put(struct block_device *); extern int bd_claim(struct block_device *, void *); extern void bd_release(struct block_device *); +#ifdef CONFIG_SYSFS +extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); +extern void bd_release_from_disk(struct block_device *, struct gendisk *); +#else +#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) +#define bd_release_from_disk(bdev, disk) bd_release(bdev) +#endif /* fs/char_dev.c */ extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); -- cgit v1.2.3 From b55e6bfcd23cb2f7249095050c649f7aea813f9f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:06 -0800 Subject: [PATCH] md: Split disks array out of raid5 conf structure so it is easier to grow The remainder of this batch implements raid5 reshaping. Currently the only shape change that is supported is added a device, but it is envisioned that changing the chunksize and layout will also be supported, as well as changing the level (e.g. 1->5, 5->6). The reshape process naturally has to move all of the data in the array, and so should be used with caution. It is believed to work, and some testing does support this, but wider testing would be great for increasing my confidence. You will need a version of mdadm newer than 2.3.1 to make use of raid5 growth. This is because mdadm need to take a copy of a 'critical section' at the start of the array incase there is a crash at an awkward moment. On restart, mdadm will restore the critical section and allow reshape to continue. I hope to release a 2.4-pre by early next week - it still needs a little more polishing. This patch: Previously the array of disk information was included in the raid5 'conf' structure which was allocated to an appropriate size. This makes it awkward to change the size of that array. So we split it off into a separate kmalloced array which will require a little extra indexing, but is much easier to grow. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/raid5.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 394da8207b3..94dbdd406f1 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -240,7 +240,7 @@ struct raid5_private_data { * waiting for 25% to be free */ spinlock_t device_lock; - struct disk_info disks[0]; + struct disk_info *disks; }; typedef struct raid5_private_data raid5_conf_t; -- cgit v1.2.3 From ad01c9e3752f4ba4f3d99c89b7370fa4983a25b5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:07 -0800 Subject: [PATCH] md: Allow stripes to be expanded in preparation for expanding an array Before a RAID-5 can be expanded, we need to be able to expand the stripe-cache data structure. This requires allocating new stripes in a new kmem_cache. If this succeeds, we copy cache pages over and release the old stripes and kmem_cache. We then allocate new pages. If that fails, we leave the stripe cache at it's new size. It isn't worth the effort to shrink it back again. Unfortuanately this means we need two kmem_cache names as we, for a short period of time, we have two kmem_caches. So they are raid5/%s and raid5/%s-alt Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/raid5.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 94dbdd406f1..b7b2653af7b 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -216,7 +216,11 @@ struct raid5_private_data { struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ atomic_t preread_active_stripes; /* stripes with scheduled io */ - char cache_name[20]; + /* unfortunately we need two cache names as we temporarily have + * two caches. + */ + int active_name; + char cache_name[2][20]; kmem_cache_t *slab_cache; /* for allocating stripes */ int seq_flush, seq_write; @@ -238,7 +242,8 @@ struct raid5_private_data { wait_queue_head_t wait_for_overlap; int inactive_blocked; /* release of inactive stripes blocked, * waiting for 25% to be free - */ + */ + int pool_size; /* number of disks in stripeheads in pool */ spinlock_t device_lock; struct disk_info *disks; }; -- cgit v1.2.3 From 7ecaa1e6a1ad69862e9980b6c777e11f26c4782d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:08 -0800 Subject: [PATCH] md: Infrastructure to allow normal IO to continue while array is expanding We need to allow that different stripes are of different effective sizes, and use the appropriate size. Also, when a stripe is being expanded, we must block any IO attempts until the stripe is stable again. Key elements in this change are: - each stripe_head gets a 'disk' field which is part of the key, thus there can sometimes be two stripe heads of the same area of the array, but covering different numbers of devices. One of these will be marked STRIPE_EXPANDING and so won't accept new requests. - conf->expand_progress tracks how the expansion is progressing and is used to determine whether the target part of the array has been expanded yet or not. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/raid5.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index b7b2653af7b..6fa274aea2a 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -135,6 +135,7 @@ struct stripe_head { atomic_t count; /* nr of active thread/requests */ spinlock_t lock; int bm_seq; /* sequence number for bitmap flushes */ + int disks; /* disks in stripe */ struct r5dev { struct bio req; struct bio_vec vec; @@ -174,6 +175,7 @@ struct stripe_head { #define STRIPE_DELAYED 6 #define STRIPE_DEGRADED 7 #define STRIPE_BIT_DELAY 8 +#define STRIPE_EXPANDING 9 /* * Plugging: @@ -211,6 +213,10 @@ struct raid5_private_data { int raid_disks, working_disks, failed_disks; int max_nr_stripes; + /* used during an expand */ + sector_t expand_progress; /* MaxSector when no expand happening */ + int previous_raid_disks; + struct list_head handle_list; /* stripes needing handling */ struct list_head delayed_list; /* stripes that have plugged requests */ struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ -- cgit v1.2.3 From ccfcc3c10b2a5cb8fd3c918199a4ff904fc6fb3e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:09 -0800 Subject: [PATCH] md: Core of raid5 resize process This patch provides the core of the resize/expand process. sync_request notices if a 'reshape' is happening and acts accordingly. It allocated new stripe_heads for the next chunk-wide-stripe in the target geometry, marking them STRIPE_EXPANDING. Then it finds which stripe heads in the old geometry can provide data needed by these and marks them STRIPE_EXPAND_SOURCE. This causes stripe_handle to read all blocks on those stripes. Once all blocks on a STRIPE_EXPAND_SOURCE stripe_head are read, any that are needed are copied into the corresponding STRIPE_EXPANDING stripe_head. Once a STRIPE_EXPANDING stripe_head is full, it is marks STRIPE_EXPAND_READY and then is written out and released. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 4 ++++ include/linux/raid/raid5.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 617b9506c76..4e26ef2cacc 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -157,6 +157,9 @@ struct mddev_s * DONE: thread is done and is waiting to be reaped * REQUEST: user-space has requested a sync (used with SYNC) * CHECK: user-space request for for check-only, no repair + * RESHAPE: A reshape is happening + * + * If neither SYNC or RESHAPE are set, then it is a recovery. */ #define MD_RECOVERY_RUNNING 0 #define MD_RECOVERY_SYNC 1 @@ -166,6 +169,7 @@ struct mddev_s #define MD_RECOVERY_NEEDED 5 #define MD_RECOVERY_REQUESTED 6 #define MD_RECOVERY_CHECK 7 +#define MD_RECOVERY_RESHAPE 8 unsigned long recovery; int in_sync; /* know to not need resync */ diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 6fa274aea2a..55c738d5050 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -157,6 +157,7 @@ struct stripe_head { #define R5_ReadError 8 /* seen a read error here recently */ #define R5_ReWrite 9 /* have tried to over-write the readerror */ +#define R5_Expanded 10 /* This block now has post-expand data */ /* * Write method */ @@ -176,7 +177,8 @@ struct stripe_head { #define STRIPE_DEGRADED 7 #define STRIPE_BIT_DELAY 8 #define STRIPE_EXPANDING 9 - +#define STRIPE_EXPAND_SOURCE 10 +#define STRIPE_EXPAND_READY 11 /* * Plugging: * -- cgit v1.2.3 From 292695531ae4019bb15deedc121b218d1908b648 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:10 -0800 Subject: [PATCH] md: Final stages of raid5 expand code This patch adds raid5_reshape and end_reshape which will start and finish the reshape processes. raid5_reshape is only enabled in CONFIG_MD_RAID5_RESHAPE is set, to discourage accidental use. Read the 'help' for the CONFIG_MD_RAID5_RESHAPE entry. and Make sure that you have backups, just in case. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index b6e0bcad84e..9c77cde5a79 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -92,7 +92,8 @@ extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, extern void md_super_wait(mddev_t *mddev); extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, struct page *page, int rw); - +extern void md_do_sync(mddev_t *mddev); +extern void md_new_event(mddev_t *mddev); #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } -- cgit v1.2.3 From f67055780caac6a99f43834795c43acf99eba6a6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:11 -0800 Subject: [PATCH] md: Checkpoint and allow restart of raid5 reshape We allow the superblock to record an 'old' and a 'new' geometry, and a position where any conversion is up to. The geometry allows for changing chunksize, layout and level as well as number of devices. When using verion-0.90 superblock, we convert the version to 0.91 while the conversion is happening so that an old kernel will refuse the assemble the array. For version-1, we use a feature bit for the same effect. When starting an array we check for an incomplete reshape and restart the reshape process if needed. If the reshape stopped at an awkward time (like when updating the first stripe) we refuse to assemble the array, and let user-space worry about it. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md.h | 2 ++ include/linux/raid/md_k.h | 8 ++++++++ include/linux/raid/md_p.h | 32 +++++++++++++++++++++++++++++--- include/linux/raid/raid5.h | 1 + 4 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index 9c77cde5a79..66b44e5e0d6 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -95,6 +95,8 @@ extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, extern void md_do_sync(mddev_t *mddev); extern void md_new_event(mddev_t *mddev); +extern void md_update_sb(mddev_t * mddev); + #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } #endif diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 4e26ef2cacc..1a6f9f2f628 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -132,6 +132,14 @@ struct mddev_s char uuid[16]; + /* If the array is being reshaped, we need to record the + * new shape and an indication of where we are up to. + * This is written to the superblock. + * If reshape_position is MaxSector, then no reshape is happening (yet). + */ + sector_t reshape_position; + int delta_disks, new_level, new_layout, new_chunk; + struct mdk_thread_s *thread; /* management thread */ struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ sector_t curr_resync; /* blocks scheduled */ diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index c100fa5d4bf..774e1acfb8c 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -102,6 +102,18 @@ typedef struct mdp_device_descriptor_s { #define MD_SB_ERRORS 1 #define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ + +/* + * Notes: + * - if an array is being reshaped (restriped) in order to change the + * the number of active devices in the array, 'raid_disks' will be + * the larger of the old and new numbers. 'delta_disks' will + * be the "new - old". So if +ve, raid_disks is the new value, and + * "raid_disks-delta_disks" is the old. If -ve, raid_disks is the + * old value and "raid_disks+delta_disks" is the new (smaller) value. + */ + + typedef struct mdp_superblock_s { /* * Constant generic information @@ -146,7 +158,13 @@ typedef struct mdp_superblock_s { __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ #endif __u32 recovery_cp; /* 11 recovery checkpoint sector count */ - __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 12]; + /* There are only valid for minor_version > 90 */ + __u64 reshape_position; /* 12,13 next address in array-space for reshape */ + __u32 new_level; /* 14 new level we are reshaping to */ + __u32 delta_disks; /* 15 change in number of raid_disks */ + __u32 new_layout; /* 16 new layout */ + __u32 new_chunk; /* 17 new chunk size (bytes) */ + __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 18]; /* * Personality information @@ -207,7 +225,14 @@ struct mdp_superblock_1 { * NOTE: signed, so bitmap can be before superblock * only meaningful of feature_map[0] is set. */ - __u8 pad1[128-100]; /* set to 0 when written */ + + /* These are only valid with feature bit '4' */ + __u64 reshape_position; /* next address in array-space for reshape */ + __u32 new_level; /* new level we are reshaping to */ + __u32 delta_disks; /* change in number of raid_disks */ + __u32 new_layout; /* new layout */ + __u32 new_chunk; /* new chunk size (bytes) */ + __u8 pad1[128-124]; /* set to 0 when written */ /* constant this-device information - 64 bytes */ __u64 data_offset; /* sector start of data, often 0 */ @@ -240,8 +265,9 @@ struct mdp_superblock_1 { /* feature_map bits */ #define MD_FEATURE_BITMAP_OFFSET 1 +#define MD_FEATURE_RESHAPE_ACTIVE 4 -#define MD_FEATURE_ALL 1 +#define MD_FEATURE_ALL 5 #endif diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 55c738d5050..abcdf0d0658 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -224,6 +224,7 @@ struct raid5_private_data { struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ atomic_t preread_active_stripes; /* stripes with scheduled io */ + atomic_t reshape_stripes; /* stripes with pending writes for reshape */ /* unfortunately we need two cache names as we temporarily have * two caches. */ -- cgit v1.2.3 From b578d55fdd80140f657130abd85aebeb345755fb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:12 -0800 Subject: [PATCH] md: Only checkpoint expansion progress occasionally Instead of checkpointing at each stripe, only checkpoint when a new write would overwrite uncheckpointed data. Block any write to the uncheckpointed area. Arbitrarily checkpoint at least every 3Meg. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/raid5.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index abcdf0d0658..914af667044 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -217,6 +217,9 @@ struct raid5_private_data { /* used during an expand */ sector_t expand_progress; /* MaxSector when no expand happening */ + sector_t expand_lo; /* from here up to expand_progress it out-of-bounds + * as we haven't flushed the metadata yet + */ int previous_raid_disks; struct list_head handle_list; /* stripes needing handling */ -- cgit v1.2.3 From 63c70c4f3a30e77e6f445bd16eff7934a031ebd3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:13 -0800 Subject: [PATCH] md: Split reshape handler in check_reshape and start_reshape check_reshape checks validity and does things that can be done instantly - like adding devices to raid1. start_reshape initiates a restriping process to convert the whole array. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 1a6f9f2f628..002ee631fab 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -261,7 +261,8 @@ struct mdk_personality int (*spare_active) (mddev_t *mddev); sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster); int (*resize) (mddev_t *mddev, sector_t sectors); - int (*reshape) (mddev_t *mddev, int raid_disks); + int (*check_reshape) (mddev_t *mddev); + int (*start_reshape) (mddev_t *mddev); int (*reconfig) (mddev_t *mddev, int layout, int chunk_size); /* quiesce moves between quiescence states * 0 - fully active -- cgit v1.2.3 From e464eafdb4400c6d6576ba3840d8bd40340f8a96 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:14 -0800 Subject: [PATCH] md: Support suspending of IO to regions of an md array This allows user-space to access data safely. This is needed for raid5 reshape as user-space needs to take a backup of the first few stripes before allowing reshape to commence. It will also be useful in cluster-aware raid1 configurations so that all cluster members can leave a section of the array untouched while a resync/recovery happens. A 'start' and 'end' of the suspended range are written to 2 sysfs attributes. Note that only one range can be suspended at a time. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 002ee631fab..c0d3097846a 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -151,6 +151,10 @@ struct mddev_s sector_t resync_mismatches; /* count of sectors where * parity/replica mismatch found */ + + /* allow user-space to request suspension of IO to regions of the array */ + sector_t suspend_lo; + sector_t suspend_hi; /* if zero, use the system-wide default */ int sync_speed_min; int sync_speed_max; -- cgit v1.2.3 From df5b89b323b922f56650b4b4d7c41899b937cf19 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 27 Mar 2006 01:18:20 -0800 Subject: [PATCH] md: Convert reconfig_sem to reconfig_mutex ... being careful that mutex_trylock is inverted wrt down_trylock Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index c0d3097846a..e2df61f5b09 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -185,7 +185,7 @@ struct mddev_s unsigned long recovery; int in_sync; /* know to not need resync */ - struct semaphore reconfig_sem; + struct mutex reconfig_mutex; atomic_t active; int changed; /* true if we might need to reread partition info */ -- cgit v1.2.3 From e2d74ac0664c89757bde8fb18c98cd7bf53da61c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 28 Mar 2006 08:59:01 +0200 Subject: [PATCH] [BLOCK] cfq-iosched: change cfq io context linking from list to tree On setups with many disks, we spend a considerable amount of time looking up the process-disk mapping on each queue of io. Testing with a NULL based block driver, this costs 40-50% reduction in throughput for 1000 disks. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c179966f1a2..ed0ffa67356 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -55,13 +55,11 @@ struct as_io_context { struct cfq_queue; struct cfq_io_context { - /* - * circular list of cfq_io_contexts belonging to a process io context - */ - struct list_head list; - struct cfq_queue *cfqq[2]; + struct rb_node rb_node; void *key; + struct cfq_queue *cfqq[2]; + struct io_context *ioc; unsigned long last_end_request; @@ -72,8 +70,8 @@ struct cfq_io_context { struct list_head queue_list; - void (*dtor)(struct cfq_io_context *); - void (*exit)(struct cfq_io_context *); + void (*dtor)(struct io_context *); /* destructor */ + void (*exit)(struct io_context *); /* called on task exit */ }; /* @@ -94,7 +92,7 @@ struct io_context { int nr_batch_requests; /* Number of requests left in the batch */ struct as_io_context *aic; - struct cfq_io_context *cic; + struct rb_root cic_root; }; void put_io_context(struct io_context *ioc); -- cgit v1.2.3 From 206dc69b31ca05baac68c75b8ed2ba7dd857d273 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 28 Mar 2006 13:03:44 +0200 Subject: [BLOCK] cfq-iosched: seek and async performance fixes Detect whether a given process is seeky and if so disable (mostly) the idle window if it is. We still allow just a little idle time, just enough to allow that process to submit a new request. That is needed to maintain fairness across priority groups. In some cases, we could setup several async queues. This is not optimal from a performance POV, since we want all async io in one queue to perform good sorting on it. It also impacted sync queues, as async io got too much slice time. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ed0ffa67356..d0cac8b58de 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -63,11 +63,17 @@ struct cfq_io_context { struct io_context *ioc; unsigned long last_end_request; - unsigned long last_queue; + sector_t last_request_pos; + unsigned long last_queue; + unsigned long ttime_total; unsigned long ttime_samples; unsigned long ttime_mean; + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + struct list_head queue_list; void (*dtor)(struct io_context *); /* destructor */ -- cgit v1.2.3 From 0080b7aae88c75e2a6b38dfcb228b0f239e18e3c Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Tue, 28 Mar 2006 01:56:15 -0800 Subject: [PATCH] synclink_gt add gpio feature Add driver support for general purpose I/O feature of the Synclink GT adapters. Signed-off-by: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/synclink.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/synclink.h b/include/linux/synclink.h index 1b7cd8d1a71..2993302f792 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -1,7 +1,7 @@ /* * SyncLink Multiprotocol Serial Adapter Driver * - * $Id: synclink.h,v 3.10 2005/11/08 19:50:54 paulkf Exp $ + * $Id: synclink.h,v 3.11 2006/02/06 21:20:29 paulkf Exp $ * * Copyright (C) 1998-2000 by Microgate Corporation * @@ -221,6 +221,12 @@ struct mgsl_icount { __u32 rxidle; }; +struct gpio_desc { + __u32 state; + __u32 smask; + __u32 dir; + __u32 dmask; +}; #define DEBUG_LEVEL_DATA 1 #define DEBUG_LEVEL_ERROR 2 @@ -276,5 +282,8 @@ struct mgsl_icount { #define MGSL_IOCLOOPTXDONE _IO(MGSL_MAGIC_IOC,9) #define MGSL_IOCSIF _IO(MGSL_MAGIC_IOC,10) #define MGSL_IOCGIF _IO(MGSL_MAGIC_IOC,11) +#define MGSL_IOCSGPIO _IOW(MGSL_MAGIC_IOC,16,struct gpio_desc) +#define MGSL_IOCGGPIO _IOR(MGSL_MAGIC_IOC,17,struct gpio_desc) +#define MGSL_IOCWAITGPIO _IOWR(MGSL_MAGIC_IOC,18,struct gpio_desc) #endif /* _SYNCLINK_H_ */ -- cgit v1.2.3 From 273577165cd206d2d6689ee4b18aa13de1ec4bde Mon Sep 17 00:00:00 2001 From: Brian Rogan Date: Tue, 28 Mar 2006 01:56:20 -0800 Subject: [PATCH] Add oprofile_add_ext_sample On ppc64 we look at a profiling register to work out the sample address and if it was in userspace or kernel. The backtrace interface oprofile_add_sample does not allow this. Create oprofile_add_ext_sample and make oprofile_add_sample use it too. Signed-off-by: Anton Blanchard Cc: Philippe Elie Cc: John Levon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oprofile.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 559c4c38a9c..b5b3197dfd4 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -61,6 +61,16 @@ void oprofile_arch_exit(void); */ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event); +/** + * Add an extended sample. Use this when the PC is not from the regs, and + * we cannot determine if we're in kernel mode from the regs. + * + * This function does perform a backtrace. + * + */ +void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel); + /* Use this instead when the PC value is not from the regs. Doesn't * backtrace. */ void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event); -- cgit v1.2.3 From a28af471b8946de052a0eb0c080d5457be93f168 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 28 Mar 2006 01:56:26 -0800 Subject: [PATCH] fs/fat/: proper prototypes for two functions Add proper prototypes for fat_cache_init() and fat_cache_destroy() in msdos_fs.h. Signed-off-by: Adrian Bunk Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msdos_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 779e6a5744c..53cee158165 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -420,6 +420,9 @@ extern int date_dos2unix(unsigned short time, unsigned short date); extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date); extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); +int fat_cache_init(void); +void fat_cache_destroy(void); + #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From e51236092d2f7e40e87e88804b5b42e5f8025415 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 28 Mar 2006 01:56:27 -0800 Subject: [PATCH] remove relayfs_fs.h This is obsolete. Cc: Tom Zanussi Cc: Jens Axboe Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/relayfs_fs.h | 287 --------------------------------------------- 1 file changed, 287 deletions(-) delete mode 100644 include/linux/relayfs_fs.h (limited to 'include/linux') diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h deleted file mode 100644 index 7342e66247f..00000000000 --- a/include/linux/relayfs_fs.h +++ /dev/null @@ -1,287 +0,0 @@ -/* - * linux/include/linux/relayfs_fs.h - * - * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp - * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) - * - * RelayFS definitions and declarations - */ - -#ifndef _LINUX_RELAYFS_FS_H -#define _LINUX_RELAYFS_FS_H - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Tracks changes to rchan/rchan_buf structs - */ -#define RELAYFS_CHANNEL_VERSION 6 - -/* - * Per-cpu relay channel buffer - */ -struct rchan_buf -{ - void *start; /* start of channel buffer */ - void *data; /* start of current sub-buffer */ - size_t offset; /* current offset into sub-buffer */ - size_t subbufs_produced; /* count of sub-buffers produced */ - size_t subbufs_consumed; /* count of sub-buffers consumed */ - struct rchan *chan; /* associated channel */ - wait_queue_head_t read_wait; /* reader wait queue */ - struct work_struct wake_readers; /* reader wake-up work struct */ - struct dentry *dentry; /* channel file dentry */ - struct kref kref; /* channel buffer refcount */ - struct page **page_array; /* array of current buffer pages */ - unsigned int page_count; /* number of current buffer pages */ - unsigned int finalized; /* buffer has been finalized */ - size_t *padding; /* padding counts per sub-buffer */ - size_t prev_padding; /* temporary variable */ - size_t bytes_consumed; /* bytes consumed in cur read subbuf */ - unsigned int cpu; /* this buf's cpu */ -} ____cacheline_aligned; - -/* - * Relay channel data structure - */ -struct rchan -{ - u32 version; /* the version of this struct */ - size_t subbuf_size; /* sub-buffer size */ - size_t n_subbufs; /* number of sub-buffers per buffer */ - size_t alloc_size; /* total buffer size allocated */ - struct rchan_callbacks *cb; /* client callbacks */ - struct kref kref; /* channel refcount */ - void *private_data; /* for user-defined data */ - size_t last_toobig; /* tried to log event > subbuf size */ - struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */ -}; - -/* - * Relay channel client callbacks - */ -struct rchan_callbacks -{ - /* - * subbuf_start - called on buffer-switch to a new sub-buffer - * @buf: the channel buffer containing the new sub-buffer - * @subbuf: the start of the new sub-buffer - * @prev_subbuf: the start of the previous sub-buffer - * @prev_padding: unused space at the end of previous sub-buffer - * - * The client should return 1 to continue logging, 0 to stop - * logging. - * - * NOTE: subbuf_start will also be invoked when the buffer is - * created, so that the first sub-buffer can be initialized - * if necessary. In this case, prev_subbuf will be NULL. - * - * NOTE: the client can reserve bytes at the beginning of the new - * sub-buffer by calling subbuf_start_reserve() in this callback. - */ - int (*subbuf_start) (struct rchan_buf *buf, - void *subbuf, - void *prev_subbuf, - size_t prev_padding); - - /* - * buf_mapped - relayfs buffer mmap notification - * @buf: the channel buffer - * @filp: relayfs file pointer - * - * Called when a relayfs file is successfully mmapped - */ - void (*buf_mapped)(struct rchan_buf *buf, - struct file *filp); - - /* - * buf_unmapped - relayfs buffer unmap notification - * @buf: the channel buffer - * @filp: relayfs file pointer - * - * Called when a relayfs file is successfully unmapped - */ - void (*buf_unmapped)(struct rchan_buf *buf, - struct file *filp); - /* - * create_buf_file - create file to represent a relayfs channel buffer - * @filename: the name of the file to create - * @parent: the parent of the file to create - * @mode: the mode of the file to create - * @buf: the channel buffer - * @is_global: outparam - set non-zero if the buffer should be global - * - * Called during relay_open(), once for each per-cpu buffer, - * to allow the client to create a file to be used to - * represent the corresponding channel buffer. If the file is - * created outside of relayfs, the parent must also exist in - * that filesystem. - * - * The callback should return the dentry of the file created - * to represent the relay buffer. - * - * Setting the is_global outparam to a non-zero value will - * cause relay_open() to create a single global buffer rather - * than the default set of per-cpu buffers. - * - * See Documentation/filesystems/relayfs.txt for more info. - */ - struct dentry *(*create_buf_file)(const char *filename, - struct dentry *parent, - int mode, - struct rchan_buf *buf, - int *is_global); - - /* - * remove_buf_file - remove file representing a relayfs channel buffer - * @dentry: the dentry of the file to remove - * - * Called during relay_close(), once for each per-cpu buffer, - * to allow the client to remove a file used to represent a - * channel buffer. - * - * The callback should return 0 if successful, negative if not. - */ - int (*remove_buf_file)(struct dentry *dentry); -}; - -/* - * relayfs kernel API, fs/relayfs/relay.c - */ - -struct rchan *relay_open(const char *base_filename, - struct dentry *parent, - size_t subbuf_size, - size_t n_subbufs, - struct rchan_callbacks *cb); -extern void relay_close(struct rchan *chan); -extern void relay_flush(struct rchan *chan); -extern void relay_subbufs_consumed(struct rchan *chan, - unsigned int cpu, - size_t consumed); -extern void relay_reset(struct rchan *chan); -extern int relay_buf_full(struct rchan_buf *buf); - -extern size_t relay_switch_subbuf(struct rchan_buf *buf, - size_t length); -extern struct dentry *relayfs_create_dir(const char *name, - struct dentry *parent); -extern int relayfs_remove_dir(struct dentry *dentry); -extern struct dentry *relayfs_create_file(const char *name, - struct dentry *parent, - int mode, - struct file_operations *fops, - void *data); -extern int relayfs_remove_file(struct dentry *dentry); - -/** - * relay_write - write data into the channel - * @chan: relay channel - * @data: data to be written - * @length: number of bytes to write - * - * Writes data into the current cpu's channel buffer. - * - * Protects the buffer by disabling interrupts. Use this - * if you might be logging from interrupt context. Try - * __relay_write() if you know you won't be logging from - * interrupt context. - */ -static inline void relay_write(struct rchan *chan, - const void *data, - size_t length) -{ - unsigned long flags; - struct rchan_buf *buf; - - local_irq_save(flags); - buf = chan->buf[smp_processor_id()]; - if (unlikely(buf->offset + length > chan->subbuf_size)) - length = relay_switch_subbuf(buf, length); - memcpy(buf->data + buf->offset, data, length); - buf->offset += length; - local_irq_restore(flags); -} - -/** - * __relay_write - write data into the channel - * @chan: relay channel - * @data: data to be written - * @length: number of bytes to write - * - * Writes data into the current cpu's channel buffer. - * - * Protects the buffer by disabling preemption. Use - * relay_write() if you might be logging from interrupt - * context. - */ -static inline void __relay_write(struct rchan *chan, - const void *data, - size_t length) -{ - struct rchan_buf *buf; - - buf = chan->buf[get_cpu()]; - if (unlikely(buf->offset + length > buf->chan->subbuf_size)) - length = relay_switch_subbuf(buf, length); - memcpy(buf->data + buf->offset, data, length); - buf->offset += length; - put_cpu(); -} - -/** - * relay_reserve - reserve slot in channel buffer - * @chan: relay channel - * @length: number of bytes to reserve - * - * Returns pointer to reserved slot, NULL if full. - * - * Reserves a slot in the current cpu's channel buffer. - * Does not protect the buffer at all - caller must provide - * appropriate synchronization. - */ -static inline void *relay_reserve(struct rchan *chan, size_t length) -{ - void *reserved; - struct rchan_buf *buf = chan->buf[smp_processor_id()]; - - if (unlikely(buf->offset + length > buf->chan->subbuf_size)) { - length = relay_switch_subbuf(buf, length); - if (!length) - return NULL; - } - reserved = buf->data + buf->offset; - buf->offset += length; - - return reserved; -} - -/** - * subbuf_start_reserve - reserve bytes at the start of a sub-buffer - * @buf: relay channel buffer - * @length: number of bytes to reserve - * - * Helper function used to reserve bytes at the beginning of - * a sub-buffer in the subbuf_start() callback. - */ -static inline void subbuf_start_reserve(struct rchan_buf *buf, - size_t length) -{ - BUG_ON(length >= buf->chan->subbuf_size - 1); - buf->offset = length; -} - -/* - * exported relay file operations, fs/relayfs/inode.c - */ -extern struct file_operations relay_file_operations; - -#endif /* _LINUX_RELAYFS_FS_H */ - -- cgit v1.2.3 From d266ab88938e49aa95f1965ee020df1b1d4c5761 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 28 Mar 2006 01:56:31 -0800 Subject: [PATCH] Small fixes backported to old IDE SiS driver Some quick backport bits from the libata PATA work to fix things found in the sis driver. The piix driver needs some fixes too but those are way to large and need someone working on old IDE with time to do them. This patch fixes the case where random bits get loaded into SIS timing registers according to the description of the correct behaviour from Vojtech Pavlik. It also adds the SiS5517 ATA16 chipset which is not currently supported by the driver. Thanks to Conrad Harriss for loaning me the machine with the 5517 chipset. Signed-off-by: Alan Cox Acked-by: Bartlomiej Zolnierkiewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 02f6cf20b14..e2ab2ac18d6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -642,6 +642,7 @@ #define PCI_DEVICE_ID_SI_965 0x0965 #define PCI_DEVICE_ID_SI_5511 0x5511 #define PCI_DEVICE_ID_SI_5513 0x5513 +#define PCI_DEVICE_ID_SI_5517 0x5517 #define PCI_DEVICE_ID_SI_5518 0x5518 #define PCI_DEVICE_ID_SI_5571 0x5571 #define PCI_DEVICE_ID_SI_5581 0x5581 -- cgit v1.2.3 From 70674f95c0a2ea694d5c39f4e514f538a09be36f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 28 Mar 2006 01:56:33 -0800 Subject: [PATCH] Optimize select/poll by putting small data sets on the stack Optimize select and poll by a using stack space for small fd sets This brings back an old optimization from Linux 2.0. Using the stack is faster than kmalloc. On a Intel P4 system it speeds up a select of a single pty fd by about 13% (~4000 cycles -> ~3500) It also saves memory because a daemon hanging in select or poll will usually save one or two less pages. This can add up - e.g. if you have 10 daemons blocking in poll/select you save 40KB of memory. I did a patch for this long ago, but it was never applied. This version is a reimplementation of the old patch that tries to be less intrusive. I only did the minimal changes needed for the stack allocation. The cut off point before external memory is allocated is currently at 832bytes. The system calls always allocate this much memory on the stack. These 832 bytes are divided into 256 bytes frontend data (for the select bitmaps of the pollfds) and the rest of the space for the wait queues used by the low level drivers. There are some extreme cases where this won't work out for select and it falls back to allocating memory too early - especially with very sparse large select bitmaps - but the majority of processes who only have a small number of file descriptors should be ok. [TBD: 832/256 might not be the best split for select or poll] I suspect more optimizations might be possible, but they would be more complicated. One way would be to cache the select/poll context over multiple system calls because typically the input values should be similar. Problem is when to flush the file descriptors out though. Signed-off-by: Andi Kleen Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poll.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/poll.h b/include/linux/poll.h index 8e8f6098508..51e1b56741f 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -11,6 +11,15 @@ #include #include +/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating + additional memory. */ +#define MAX_STACK_ALLOC 832 +#define FRONTEND_STACK_ALLOC 256 +#define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC +#define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC +#define WQUEUES_STACK_ALLOC (MAX_STACK_ALLOC - FRONTEND_STACK_ALLOC) +#define N_INLINE_POLL_ENTRIES (WQUEUES_STACK_ALLOC / sizeof(struct poll_table_entry)) + struct poll_table_struct; /* @@ -33,6 +42,12 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) pt->qproc = qproc; } +struct poll_table_entry { + struct file * filp; + wait_queue_t wait; + wait_queue_head_t * wait_address; +}; + /* * Structures and helpers for sys_poll/sys_poll */ @@ -40,6 +55,8 @@ struct poll_wqueues { poll_table pt; struct poll_table_page * table; int error; + int inline_index; + struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES]; }; extern void poll_initwait(struct poll_wqueues *pwq); -- cgit v1.2.3 From 631d6747e1d877a4baa924cb373b8b9511a53e5e Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 28 Mar 2006 01:56:36 -0800 Subject: [PATCH] for_each_possible_cpu: defines for_each_possible_cpu for_each_cpu() is a for-loop over cpu_possible_map. for_each_online_cpu is for-loop cpu over cpu_online_map. .....for_each_cpu() is not sufficiently explicit and can lead to mistakes. This patch adds for_each_possible_cpu() in preparation for the removal of for_each_cpu(). Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 99e6115d8e5..9cbb781d6f8 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -67,7 +67,7 @@ * * int any_online_cpu(mask) First online cpu in mask * - * for_each_cpu(cpu) for-loop cpu over cpu_possible_map + * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map * @@ -405,7 +405,8 @@ int __any_online_cpu(const cpumask_t *mask); #define any_online_cpu(mask) 0 #endif -#define for_each_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) +#define for_each_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) +#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) -- cgit v1.2.3 From 0a945022778f100115d0cb6234eb28fc1b15ccaf Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 28 Mar 2006 01:56:37 -0800 Subject: [PATCH] for_each_possible_cpu: fixes for generic part replaces for_each_cpu with for_each_possible_cpu(). Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genhd.h | 4 ++-- include/linux/kernel_stat.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3c1b0294a74..10a27f29d69 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -152,14 +152,14 @@ struct disk_attribute { ({ \ typeof(gendiskp->dkstats->field) res = 0; \ int i; \ - for_each_cpu(i) \ + for_each_possible_cpu(i) \ res += per_cpu_ptr(gendiskp->dkstats, i)->field; \ res; \ }) static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { int i; - for_each_cpu(i) + for_each_possible_cpu(i) memset(per_cpu_ptr(gendiskp->dkstats, i), value, sizeof (struct disk_stats)); } diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index a484572c302..b46249082cc 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -46,7 +46,7 @@ static inline int kstat_irqs(int irq) { int cpu, sum = 0; - for_each_cpu(cpu) + for_each_possible_cpu(cpu) sum += kstat_cpu(cpu).irqs[irq]; return sum; -- cgit v1.2.3 From 99ac48f54a91d02140c497edc31dc57d4bc5c85d Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 28 Mar 2006 01:56:41 -0800 Subject: [PATCH] mark f_ops const in the inode Mark the f_ops members of inodes as const, as well as fix the ripple-through this causes by places that copy this f_ops and then "do stuff" with it. Signed-off-by: Arjan van de Ven Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cdev.h | 4 ++-- include/linux/debugfs.h | 2 +- include/linux/fs.h | 6 +++--- include/linux/input.h | 2 +- include/linux/miscdevice.h | 2 +- include/linux/oprofile.h | 4 ++-- include/linux/proc_fs.h | 4 ++-- include/linux/sound.h | 12 ++++++------ include/linux/sunrpc/stats.h | 4 ++-- include/linux/usb.h | 2 +- include/linux/videodev2.h | 2 +- 11 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cdev.h b/include/linux/cdev.h index 8da37e29cb8..2216638962d 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -5,13 +5,13 @@ struct cdev { struct kobject kobj; struct module *owner; - struct file_operations *ops; + const struct file_operations *ops; struct list_head list; dev_t dev; unsigned int count; }; -void cdev_init(struct cdev *, struct file_operations *); +void cdev_init(struct cdev *, const struct file_operations *); struct cdev *cdev_alloc(void); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4b0428e335b..176e2d37157 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -29,7 +29,7 @@ struct debugfs_blob_wrapper { #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_create_file(const char *name, mode_t mode, struct dentry *parent, void *data, - struct file_operations *fops); + const struct file_operations *fops); struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); diff --git a/include/linux/fs.h b/include/linux/fs.h index 680d913350e..ef355bc7371 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -496,7 +496,7 @@ struct inode { struct mutex i_mutex; struct rw_semaphore i_alloc_sem; struct inode_operations *i_op; - struct file_operations *i_fop; /* former ->i_op->default_file_ops */ + const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ struct super_block *i_sb; struct file_lock *i_flock; struct address_space *i_mapping; @@ -636,7 +636,7 @@ struct file { } f_u; struct dentry *f_dentry; struct vfsmount *f_vfsmnt; - struct file_operations *f_op; + const struct file_operations *f_op; atomic_t f_count; unsigned int f_flags; mode_t f_mode; @@ -1414,7 +1414,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *); extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int register_chrdev(unsigned int, const char *, - struct file_operations *); + const struct file_operations *); extern int unregister_chrdev(unsigned int, const char *); extern void unregister_chrdev_region(dev_t, unsigned); extern int chrdev_open(struct inode *, struct file *); diff --git a/include/linux/input.h b/include/linux/input.h index 6d4cc3c110d..1d4e341b72e 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -957,7 +957,7 @@ struct input_handler { struct input_handle* (*connect)(struct input_handler *handler, struct input_dev *dev, struct input_device_id *id); void (*disconnect)(struct input_handle *handle); - struct file_operations *fops; + const struct file_operations *fops; int minor; char *name; diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 14ceebfc1ef..5b584dafb5a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -36,7 +36,7 @@ struct class_device; struct miscdevice { int minor; const char *name; - struct file_operations *fops; + const struct file_operations *fops; struct list_head list; struct device *dev; struct class_device *class; diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index b5b3197dfd4..0d514b25245 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -84,10 +84,10 @@ void oprofile_add_trace(unsigned long eip); * the specified file operations. */ int oprofilefs_create_file(struct super_block * sb, struct dentry * root, - char const * name, struct file_operations * fops); + char const * name, const struct file_operations * fops); int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root, - char const * name, struct file_operations * fops, int perm); + char const * name, const struct file_operations * fops, int perm); /** Create a file for read/write access to an unsigned long. */ int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root, diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index cb224cf653b..6d03d025fcd 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -58,7 +58,7 @@ struct proc_dir_entry { gid_t gid; loff_t size; struct inode_operations * proc_iops; - struct file_operations * proc_fops; + const struct file_operations * proc_fops; get_info_t *get_info; struct module *owner; struct proc_dir_entry *next, *parent, *subdir; @@ -189,7 +189,7 @@ static inline struct proc_dir_entry *proc_net_create(const char *name, } static inline struct proc_dir_entry *proc_net_fops_create(const char *name, - mode_t mode, struct file_operations *fops) + mode_t mode, const struct file_operations *fops) { struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net); if (res) diff --git a/include/linux/sound.h b/include/linux/sound.h index 72b9af4c3fd..f63d8342ffa 100644 --- a/include/linux/sound.h +++ b/include/linux/sound.h @@ -30,12 +30,12 @@ */ struct device; -extern int register_sound_special(struct file_operations *fops, int unit); -extern int register_sound_special_device(struct file_operations *fops, int unit, struct device *dev); -extern int register_sound_mixer(struct file_operations *fops, int dev); -extern int register_sound_midi(struct file_operations *fops, int dev); -extern int register_sound_dsp(struct file_operations *fops, int dev); -extern int register_sound_synth(struct file_operations *fops, int dev); +extern int register_sound_special(const struct file_operations *fops, int unit); +extern int register_sound_special_device(const struct file_operations *fops, int unit, struct device *dev); +extern int register_sound_mixer(const struct file_operations *fops, int dev); +extern int register_sound_midi(const struct file_operations *fops, int dev); +extern int register_sound_dsp(const struct file_operations *fops, int dev); +extern int register_sound_synth(const struct file_operations *fops, int dev); extern void unregister_sound_special(int unit); extern void unregister_sound_mixer(int unit); diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index 0d6ed3c8bdc..d93c24b47f3 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -50,7 +50,7 @@ struct proc_dir_entry * rpc_proc_register(struct rpc_stat *); void rpc_proc_unregister(const char *); void rpc_proc_zero(struct rpc_program *); struct proc_dir_entry * svc_proc_register(struct svc_stat *, - struct file_operations *); + const struct file_operations *); void svc_proc_unregister(const char *); void svc_seq_show(struct seq_file *, @@ -65,7 +65,7 @@ static inline void rpc_proc_unregister(const char *p) {} static inline void rpc_proc_zero(struct rpc_program *p) {} static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s, - struct file_operations *f) { return NULL; } + const struct file_operations *f) { return NULL; } static inline void svc_proc_unregister(const char *p) {} static inline void svc_seq_show(struct seq_file *seq, diff --git a/include/linux/usb.h b/include/linux/usb.h index 130d125fda1..e34e5e3dce5 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -615,7 +615,7 @@ extern struct bus_type usb_bus_type; */ struct usb_class_driver { char *name; - struct file_operations *fops; + const struct file_operations *fops; int minor_base; }; diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 2275bfec5b6..af2d6155d3f 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -75,7 +75,7 @@ struct video_device int minor; /* device ops + callbacks */ - struct file_operations *fops; + const struct file_operations *fops; void (*release)(struct video_device *vfd); -- cgit v1.2.3 From 4b6f5d20b04dcbc3d888555522b90ba6d36c4106 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 28 Mar 2006 01:56:42 -0800 Subject: [PATCH] Make most file operations structs in fs/ const This is a conversion to make the various file_operations structs in fs/ const. Basically a regexp job, with a few manual fixups The goal is both to increase correctness (harder to accidentally write to shared datastructures) and reducing the false sharing of cachelines with things that get dirty in .data (while .rodata is nicely read only and thus cache clean) Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coda_linux.h | 6 +++--- include/linux/crash_dump.h | 2 +- include/linux/efs_fs.h | 2 +- include/linux/ext3_fs.h | 4 ++-- include/linux/fs.h | 20 ++++++++++---------- include/linux/hugetlb.h | 2 +- include/linux/msdos_fs.h | 4 ++-- include/linux/ncp_fs.h | 4 ++-- include/linux/nfs_fs.h | 4 ++-- include/linux/proc_fs.h | 6 +++--- include/linux/qnx4_fs.h | 4 ++-- include/linux/ramfs.h | 2 +- include/linux/reiserfs_fs.h | 4 ++-- include/linux/ufs_fs.h | 4 ++-- 14 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index cc621ec409d..b3ecf8f71d9 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -30,9 +30,9 @@ extern struct inode_operations coda_ioctl_inode_operations; extern struct address_space_operations coda_file_aops; extern struct address_space_operations coda_symlink_aops; -extern struct file_operations coda_dir_operations; -extern struct file_operations coda_file_operations; -extern struct file_operations coda_ioctl_operations; +extern const struct file_operations coda_dir_operations; +extern const struct file_operations coda_file_operations; +extern const struct file_operations coda_ioctl_operations; /* operations shared over more than one file */ int coda_open(struct inode *i, struct file *f); diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 534d750d922..32503657f14 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -11,7 +11,7 @@ extern unsigned long long elfcorehdr_addr; extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); -extern struct file_operations proc_vmcore_operations; +extern const struct file_operations proc_vmcore_operations; extern struct proc_dir_entry *proc_vmcore; #endif /* CONFIG_CRASH_DUMP */ diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h index 28f368c526f..fbfa6b52e2f 100644 --- a/include/linux/efs_fs.h +++ b/include/linux/efs_fs.h @@ -37,7 +37,7 @@ static inline struct efs_sb_info *SUPER_INFO(struct super_block *sb) struct statfs; extern struct inode_operations efs_dir_inode_operations; -extern struct file_operations efs_dir_operations; +extern const struct file_operations efs_dir_operations; extern struct address_space_operations efs_symlink_aops; extern void efs_read_inode(struct inode *); diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 8bb4f842cde..3ade6a4e3bd 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -833,11 +833,11 @@ do { \ */ /* dir.c */ -extern struct file_operations ext3_dir_operations; +extern const struct file_operations ext3_dir_operations; /* file.c */ extern struct inode_operations ext3_file_inode_operations; -extern struct file_operations ext3_file_operations; +extern const struct file_operations ext3_file_operations; /* namei.c */ extern struct inode_operations ext3_dir_inode_operations; diff --git a/include/linux/fs.h b/include/linux/fs.h index ef355bc7371..408fe89498f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1390,11 +1390,11 @@ extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern struct block_device *open_by_devnum(dev_t, unsigned); -extern struct file_operations def_blk_fops; +extern const struct file_operations def_blk_fops; extern struct address_space_operations def_blk_aops; -extern struct file_operations def_chr_fops; -extern struct file_operations bad_sock_fops; -extern struct file_operations def_fifo_fops; +extern const struct file_operations def_chr_fops; +extern const struct file_operations bad_sock_fops; +extern const struct file_operations def_fifo_fops; extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); @@ -1444,9 +1444,9 @@ extern void init_special_inode(struct inode *, umode_t, dev_t); extern void make_bad_inode(struct inode *); extern int is_bad_inode(struct inode *); -extern struct file_operations read_fifo_fops; -extern struct file_operations write_fifo_fops; -extern struct file_operations rdwr_fifo_fops; +extern const struct file_operations read_fifo_fops; +extern const struct file_operations write_fifo_fops; +extern const struct file_operations rdwr_fifo_fops; extern int fs_may_remount_ro(struct super_block *); @@ -1688,7 +1688,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, nr_segs, get_block, end_io, DIO_OWN_LOCKING); } -extern struct file_operations generic_ro_fops; +extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) @@ -1744,9 +1744,9 @@ extern int simple_commit_write(struct file *file, struct page *page, extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); -extern struct file_operations simple_dir_operations; +extern const struct file_operations simple_dir_operations; extern struct inode_operations simple_dir_inode_operations; -struct tree_descr { char *name; struct file_operations *ops; int mode; }; +struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, int, struct tree_descr *); extern int simple_pin_fs(char *name, struct vfsmount **mount, int *count); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d6f1019625a..4c5e610fe44 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -154,7 +154,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) return sb->s_fs_info; } -extern struct file_operations hugetlbfs_file_operations; +extern const struct file_operations hugetlbfs_file_operations; extern struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_zero_setup(size_t); int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 53cee158165..d9035c73e5d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -334,7 +334,7 @@ extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, unsigned long *mapped_blocks); /* fat/dir.c */ -extern struct file_operations fat_dir_operations; +extern const struct file_operations fat_dir_operations; extern int fat_search_long(struct inode *inode, const unsigned char *name, int name_len, struct fat_slot_info *sinfo); extern int fat_dir_empty(struct inode *dir); @@ -397,7 +397,7 @@ extern int fat_count_free_clusters(struct super_block *sb); /* fat/file.c */ extern int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); -extern struct file_operations fat_file_operations; +extern const struct file_operations fat_file_operations; extern struct inode_operations fat_file_inode_operations; extern int fat_notify_change(struct dentry * dentry, struct iattr * attr); extern void fat_truncate(struct inode *inode); diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index e0134256853..96dc237b8f0 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -209,7 +209,7 @@ void ncp_update_inode2(struct inode *, struct ncp_entry_info *); /* linux/fs/ncpfs/dir.c */ extern struct inode_operations ncp_dir_inode_operations; -extern struct file_operations ncp_dir_operations; +extern const struct file_operations ncp_dir_operations; int ncp_conn_logged_in(struct super_block *); int ncp_date_dos2unix(__le16 time, __le16 date); void ncp_date_unix2dos(int unix_date, __le16 * time, __le16 * date); @@ -230,7 +230,7 @@ void ncp_unlock_server(struct ncp_server *server); /* linux/fs/ncpfs/file.c */ extern struct inode_operations ncp_file_inode_operations; -extern struct file_operations ncp_file_operations; +extern const struct file_operations ncp_file_operations; int ncp_make_open(struct inode *, int); /* linux/fs/ncpfs/mmap.c */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cbebd7d1b9e..c71227dd438 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -324,7 +324,7 @@ extern struct inode_operations nfs_file_inode_operations; #ifdef CONFIG_NFS_V3 extern struct inode_operations nfs3_file_inode_operations; #endif /* CONFIG_NFS_V3 */ -extern struct file_operations nfs_file_operations; +extern const struct file_operations nfs_file_operations; extern struct address_space_operations nfs_file_aops; static inline struct rpc_cred *nfs_file_cred(struct file *file) @@ -371,7 +371,7 @@ extern struct inode_operations nfs_dir_inode_operations; #ifdef CONFIG_NFS_V3 extern struct inode_operations nfs3_dir_inode_operations; #endif /* CONFIG_NFS_V3 */ -extern struct file_operations nfs_dir_operations; +extern const struct file_operations nfs_dir_operations; extern struct dentry_operations nfs_dentry_operations; extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 6d03d025fcd..135871df991 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -128,9 +128,9 @@ extern int proc_match(int, const char *,struct proc_dir_entry *); extern int proc_readdir(struct file *, void *, filldir_t); extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); -extern struct file_operations proc_kcore_operations; -extern struct file_operations proc_kmsg_operations; -extern struct file_operations ppc_htab_operations; +extern const struct file_operations proc_kcore_operations; +extern const struct file_operations proc_kmsg_operations; +extern const struct file_operations ppc_htab_operations; /* * proc_tty.c diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index fc610bb0f73..27f49c85d5d 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -118,8 +118,8 @@ extern struct buffer_head *qnx4_bread(struct inode *, int, int); extern struct inode_operations qnx4_file_inode_operations; extern struct inode_operations qnx4_dir_inode_operations; -extern struct file_operations qnx4_file_operations; -extern struct file_operations qnx4_dir_operations; +extern const struct file_operations qnx4_file_operations; +extern const struct file_operations qnx4_dir_operations; extern int qnx4_is_free(struct super_block *sb, long block); extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 953b6df5d03..78ecfa28b1c 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -15,7 +15,7 @@ extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); #endif -extern struct file_operations ramfs_file_operations; +extern const struct file_operations ramfs_file_operations; extern struct vm_operations_struct generic_file_vm_ops; #endif diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 912f1b7cb18..5676c4210e2 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1960,7 +1960,7 @@ int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset, extern struct inode_operations reiserfs_dir_inode_operations; extern struct inode_operations reiserfs_symlink_inode_operations; extern struct inode_operations reiserfs_special_inode_operations; -extern struct file_operations reiserfs_dir_operations; +extern const struct file_operations reiserfs_dir_operations; /* tail_conversion.c */ int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, @@ -1972,7 +1972,7 @@ void reiserfs_unmap_buffer(struct buffer_head *); /* file.c */ extern struct inode_operations reiserfs_file_inode_operations; -extern struct file_operations reiserfs_file_operations; +extern const struct file_operations reiserfs_file_operations; extern struct address_space_operations reiserfs_address_space_operations; /* fix_nodes.c */ diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h index b0ffe4356e5..843aeaaa79d 100644 --- a/include/linux/ufs_fs.h +++ b/include/linux/ufs_fs.h @@ -895,7 +895,7 @@ extern void ufs_set_link(struct inode *, struct ufs_dir_entry *, struct buffer_h /* file.c */ extern struct inode_operations ufs_file_inode_operations; -extern struct file_operations ufs_file_operations; +extern const struct file_operations ufs_file_operations; extern struct address_space_operations ufs_aops; @@ -915,7 +915,7 @@ extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); /* namei.c */ -extern struct file_operations ufs_dir_operations; +extern const struct file_operations ufs_dir_operations; /* super.c */ extern void ufs_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -- cgit v1.2.3 From 910638ae7ed4be27d6af55f6c9b5bf54b838e78b Mon Sep 17 00:00:00 2001 From: Matthias Gehre Date: Tue, 28 Mar 2006 01:56:48 -0800 Subject: [PATCH] Replace 0xff.. with correct DMA_xBIT_MASK Replace all occurences of 0xff.. in calls to function pci_set_dma_mask() and pci_set_consistant_dma_mask() with the corresponding DMA_xBIT_MASK from linux/dma-mapping.h. Signed-off-by: Matthias Gehre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index a8731062a74..9b4751aecc2 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -21,6 +21,7 @@ enum dma_data_direction { #define DMA_30BIT_MASK 0x000000003fffffffULL #define DMA_29BIT_MASK 0x000000001fffffffULL #define DMA_28BIT_MASK 0x000000000fffffffULL +#define DMA_24BIT_MASK 0x0000000000ffffffULL #include -- cgit v1.2.3 From 7f927fcc2fd1575d01efb4b76665975007945690 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Mar 2006 01:56:53 -0800 Subject: [PATCH] Typo fixes Fix a lot of typos. Eyeballed by jmc@ in OpenBSD. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 2cb19e6503a..d03fadfcafe 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -734,7 +734,7 @@ struct fb_tile_ops { /* A driver may set this flag to indicate that it does want a set_par to be * called every time when fbcon_switch is executed. The advantage is that with - * this flag set you can really be shure that set_par is always called before + * this flag set you can really be sure that set_par is always called before * any of the functions dependant on the correct hardware state or altering * that state, even if you are using some broken X releases. The disadvantage * is that it introduces unwanted delays to every console switch if set_par -- cgit v1.2.3 From 6c99c5cb94319a601b5ec5ee31c331f84755dd74 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 28 Mar 2006 16:11:00 -0800 Subject: [PATCH] Remove dead kill_sl prototype from sched.h The kill_sl function doesn't exist in the kernel so a prototype is completely unnecessary. Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 20b4f0372e4..5f5ab98bbb6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1101,7 +1101,6 @@ extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); extern void zap_other_threads(struct task_struct *p); extern int kill_pg(pid_t, int, int); -extern int kill_sl(pid_t, int, int); extern int kill_proc(pid_t, int, int); extern struct sigqueue *sigqueue_alloc(void); extern void sigqueue_free(struct sigqueue *); -- cgit v1.2.3 From d73d65293e3e2de7e916a89c8da30be0948afab7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 28 Mar 2006 16:11:03 -0800 Subject: [PATCH] pidhash: kill switch_exec_pids switch_exec_pids is only called from de_thread by way of exec, and it is only called when we are exec'ing from a non thread group leader. Currently switch_exec_pids gives the leader the pid of the thread and unhashes and rehashes all of the process groups. The leader is already in the EXIT_DEAD state so no one cares about it's pids. The only concern for the leader is that __unhash_process called from release_task will function correctly. If we don't touch the leader at all we know that __unhash_process will work fine so there is no need to touch the leader. For the task becomming the thread group leader, we just need to give it the pid of the old thread group leader, add it to the task list, and attach it to the session and the process group of the thread group. Currently de_thread is also adding the task to the task list which is just silly. Currently the only leader of __detach_pid besides detach_pid is switch_exec_pids because of the ugly extra work that was being performed. So this patch removes switch_exec_pids because it is doing too much, it is creating an unnecessary special case in pid.c, duing work duplicated in de_thread, and generally obscuring what it is going on. The necessary work is added to de_thread, and it seems to be a little clearer there what is going on. Signed-off-by: Eric W. Biederman Cc: Oleg Nesterov Cc: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 5b2fcb19d2d..099e70ecf7c 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -38,7 +38,6 @@ extern struct pid *FASTCALL(find_pid(enum pid_type, int)); extern int alloc_pidmap(void); extern void FASTCALL(free_pidmap(int)); -extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread); #define do_each_task_pid(who, type, task) \ if ((task = find_task_by_pid_type(type, who))) { \ -- cgit v1.2.3 From 8fafabd86f1b75ed3cc6a6ffbe6c3e53e3d8457d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:05 -0800 Subject: [PATCH] remove add_parent()'s parent argument add_parent(p, parent) is always called with parent == p->parent, and it makes no sense to do it differently. This patch removes this argument. No changes in affected .o files. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5f5ab98bbb6..b4b14c32b28 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1184,7 +1184,7 @@ extern void wait_task_inactive(task_t * p); #endif #define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p, parent) list_add_tail(&(p)->sibling,&(parent)->children) +#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) #define REMOVE_LINKS(p) do { \ if (thread_group_leader(p)) \ @@ -1195,7 +1195,7 @@ extern void wait_task_inactive(task_t * p); #define SET_LINKS(p) do { \ if (thread_group_leader(p)) \ list_add_tail(&(p)->tasks,&init_task.tasks); \ - add_parent(p, (p)->parent); \ + add_parent(p); \ } while (0) #define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks) -- cgit v1.2.3 From c97d98931ac52ef110b62d9b75c6a6f2bfbc1898 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:06 -0800 Subject: [PATCH] kill SET_LINKS/REMOVE_LINKS Both SET_LINKS() and SET_LINKS/REMOVE_LINKS() have exactly one caller, and these callers already check thread_group_leader(). This patch kills theese macros, they mix two different things: setting process's parent and registering it in init_task.tasks list. Callers are updated to do these actions by hand. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4b14c32b28..1f16fb1fea2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1186,18 +1186,6 @@ extern void wait_task_inactive(task_t * p); #define remove_parent(p) list_del_init(&(p)->sibling) #define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) -#define REMOVE_LINKS(p) do { \ - if (thread_group_leader(p)) \ - list_del_init(&(p)->tasks); \ - remove_parent(p); \ - } while (0) - -#define SET_LINKS(p) do { \ - if (thread_group_leader(p)) \ - list_add_tail(&(p)->tasks,&init_task.tasks); \ - add_parent(p); \ - } while (0) - #define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks) #define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks) -- cgit v1.2.3 From 73b9ebfe126a4a886ee46cbab637374d7024668a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:07 -0800 Subject: [PATCH] pidhash: don't count idle threads fork_idle() does unhash_process() just after copy_process(). Contrary, boot_cpu's idle thread explicitely registers itself for each pid_type with nr = 0. copy_process() already checks p->pid != 0 before process_counts++, I think we can just skip attach_pid() calls and job control inits for idle threads and kill unhash_process(). We don't need to cleanup ->proc_dentry in fork_idle() because with this patch idle threads are never hashed in kernel/pid.c:pid_hash[]. We don't need to hash pid == 0 in pidmap_init(). free_pidmap() is never called with pid == 0 arg, so it will never be reused. So it is still possible to use pid == 0 in any PIDTYPE_xxx namespace from kernel/pid.c's POV. However with this patch we don't hash pid == 0 for PIDTYPE_PID case. We still have have PIDTYPE_PGID/PIDTYPE_SID entries with pid == 0: /sbin/init and kernel threads which don't call daemonize(). Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1f16fb1fea2..ddc0df7f8bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1214,8 +1214,6 @@ static inline int thread_group_empty(task_t *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern void unhash_process(struct task_struct *p); - /* * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also -- cgit v1.2.3 From c7c6464117a02b0d54feb4ebeca4db70fa493678 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:09 -0800 Subject: [PATCH] pidhash: don't use zero pids daemonize() calls set_special_pids(1,1), while init and kernel threads spawned from init/main.c:init() run with 0,0 special pids. This patch changes INIT_SIGNALS() so that that they run with ->pgrp == ->session == 1 also. This patch relies on fact that swapper's pid == 1. Now we have no hashed zero pids in pid_hash[]. User-space visibible change is that now /sbin/init runs with (1,1) special pids and becomes a session leader. Quoting Eric W. Biederman: > > daemonize consuming pids (1,1) then consumes pgrp 1. So that when > /sbin/init calls setsid() it thinks /sbin/init is a process group > leader and setsid() fails. So /sbin/init wants pgrp 1 session 1 > but doesn't get it. I am pretty certain daemonize did not exist so > /sbin/init got pgrp 1 session 1 in 2.4. > > That is the bug that is being fixed. > > This patch takes things one step farther and essentially calls > setsid() for pid == 1 before init is execed. That is new behavior > but it cleans up the kernel as we now do not need to support the > case of a process without a process group or a session. > > The only process that could have possibly cared was /sbin/init > and it already calls setsid() because it doesn't want that. > > If this was going to break anything noticeable the change in behavior > from 2.4 to 2.6 would have already done that. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 92146f3b742..41ecbb847f3 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -62,6 +62,8 @@ .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ + .pgrp = 1, \ + .session = 1, \ } #define INIT_SIGHAND(sighand) { \ -- cgit v1.2.3 From aa1757f90bea3f598b6e5d04d922a6a60200f1da Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:12 -0800 Subject: [PATCH] convert sighand_cache to use SLAB_DESTROY_BY_RCU This patch borrows a clever Hugh's 'struct anon_vma' trick. Without tasklist_lock held we can't trust task->sighand until we locked it and re-checked that it is still the same. But this means we don't need to defer 'kmem_cache_free(sighand)'. We can return the memory to slab immediately, all we need is to be sure that sighand->siglock can't dissapear inside rcu protected section. To do so we need to initialize ->siglock inside ctor function, SLAB_DESTROY_BY_RCU does the rest. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ddc0df7f8bf..bbcfc873bd9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -355,16 +355,8 @@ struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; - struct rcu_head rcu; }; -extern void sighand_free_cb(struct rcu_head *rhp); - -static inline void sighand_free(struct sighand_struct *sp) -{ - call_rcu(&sp->rcu, sighand_free_cb); -} - /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always -- cgit v1.2.3 From f63ee72e0fb82e504a0489490babc7612c7cd6c2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:13 -0800 Subject: [PATCH] introduce lock_task_sighand() helper Add lock_task_sighand() helper and converts group_send_sig_info() to use it. Hopefully we will have more users soon. This patch also removes '!sighand->count' and '!p->usage' checks, I think they both are bogus, racy and unneeded (but probably it makes sense to restore them as BUG_ON()s). ->sighand is cleared and it's ->count is decremented in release_task() with sighand->siglock held, so it is a bug to have '!p->usage || !->count' after we already locked and verified it is the same. On the other hand, an already dead task without ->sighand can have a non-zero ->usage due to ptrace, for example. If we read the stale value of ->sighand we must see the change after spin_lock(), because that change was done while holding that same old ->sighand.siglock. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index bbcfc873bd9..ca1fd31aae9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1225,6 +1225,15 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } +extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags); + +static inline void unlock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); +} + #ifndef __HAVE_THREAD_FUNCTIONS #define task_thread_info(task) (task)->thread_info -- cgit v1.2.3 From 7001510d0cbf51ad202dd2d0744f54104285cbb9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:14 -0800 Subject: [PATCH] copy_process: cleanup bad_fork_cleanup_sighand The only caller of exit_sighand(tsk) is copy_process's error path. We can call __exit_sighand() directly and kill exit_sighand(). This 'tsk' was not yet registered in pid_hash[] or init_task.tasks, it has no external references, nobody can see it, and IF (clone_flags & CLONE_SIGHAND) At least 'current' has a reference to ->sighand, this means atomic_dec_and_test(sighand->count) can't be true. ELSE Nobody can see this ->sighand, this means we can free it without any locking. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ca1fd31aae9..69c2a1e1529 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,6 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void exit_signal(struct task_struct *); extern void __exit_signal(struct task_struct *); -extern void exit_sighand(struct task_struct *); extern void __exit_sighand(struct task_struct *); extern void exit_itimers(struct signal_struct *); -- cgit v1.2.3 From 6b3934ef52712ece50605dfc72e55d00c580831a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:16 -0800 Subject: [PATCH] copy_process: cleanup bad_fork_cleanup_signal __exit_signal() does important cleanups atomically under ->siglock. It is also called from copy_process's error path. This is not good, for example we can't move __unhash_process() under ->siglock for that reason. We should not mix these 2 paths, just look at ugly 'if (p->sighand)' under 'bad_fork_cleanup_sighand:' label. For copy_process() case it is sufficient to just backout copy_signal(), nothing more. Again, nobody can see this task yet. For CLONE_THREAD case we just decrement signal->count, otherwise nobody can see this ->signal and we can free it lockless. This patch assumes it is safe to do exit_thread_group_keys() without tasklist_lock. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: David Howells Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- include/linux/slab.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 69c2a1e1529..7dd430b697a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1149,7 +1149,7 @@ extern void flush_thread(void); extern void exit_thread(void); extern void exit_files(struct task_struct *); -extern void exit_signal(struct task_struct *); +extern void __cleanup_signal(struct signal_struct *); extern void __exit_signal(struct task_struct *); extern void __exit_sighand(struct task_struct *); extern void exit_itimers(struct signal_struct *); diff --git a/include/linux/slab.h b/include/linux/slab.h index 15e1d9736b1..3af03b19c98 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -210,7 +210,6 @@ extern kmem_cache_t *names_cachep; extern kmem_cache_t *files_cachep; extern kmem_cache_t *filp_cachep; extern kmem_cache_t *fs_cachep; -extern kmem_cache_t *signal_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; -- cgit v1.2.3 From c81addc9d3a0ebff2155e0cd86f90820ab97147e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:17 -0800 Subject: [PATCH] rename __exit_sighand to cleanup_sighand Cosmetic, rename __exit_sighand to cleanup_sighand and move it close to copy_sighand(). This matches copy_signal/cleanup_signal naming, and I think it is easier to follow. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7dd430b697a..921148277da 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1150,8 +1150,8 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void __cleanup_signal(struct signal_struct *); +extern void cleanup_sighand(struct task_struct *); extern void __exit_signal(struct task_struct *); -extern void __exit_sighand(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern NORET_TYPE void do_group_exit(int); -- cgit v1.2.3 From 6a14c5c9da0b4c34b5be783403c54f0396fcfe77 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:18 -0800 Subject: [PATCH] move __exit_signal() to kernel/exit.c __exit_signal() is private to release_task() now. I think it is better to make it static in kernel/exit.c and export flush_sigqueue() instead - this function is much more simple and straightforward. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - include/linux/signal.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 921148277da..a913fca9e70 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,6 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void __cleanup_signal(struct signal_struct *); extern void cleanup_sighand(struct task_struct *); -extern void __exit_signal(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern NORET_TYPE void do_group_exit(int); diff --git a/include/linux/signal.h b/include/linux/signal.h index b7d093520bb..162a8fd10b2 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -249,6 +249,8 @@ static inline void init_sigpending(struct sigpending *sig) INIT_LIST_HEAD(&sig->list); } +extern void flush_sigqueue(struct sigpending *queue); + /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) { -- cgit v1.2.3 From 47e65328a7b1cdfc4e3102e50d60faf94ebba7d3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:25 -0800 Subject: [PATCH] pids: kill PIDTYPE_TGID This patch kills PIDTYPE_TGID pid_type thus saving one hash table in kernel/pid.c and speeding up subthreads create/destroy a bit. It is also a preparation for the further tref/pids rework. This patch adds 'struct list_head thread_group' to 'struct task_struct' instead. We don't detach group leader from PIDTYPE_PID namespace until another thread inherits it's ->pid == ->tgid, so we are safe wrt premature free_pidmap(->tgid) call. Currently there are no users of find_task_by_pid_type(PIDTYPE_TGID). Should the need arise, we can use find_task_by_pid()->group_leader. Signed-off-by: Oleg Nesterov Acked-By: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 1 - include/linux/sched.h | 11 ++++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 099e70ecf7c..5b9082cc600 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -4,7 +4,6 @@ enum pid_type { PIDTYPE_PID, - PIDTYPE_TGID, PIDTYPE_PGID, PIDTYPE_SID, PIDTYPE_MAX diff --git a/include/linux/sched.h b/include/linux/sched.h index a913fca9e70..99855f694eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -752,6 +752,7 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid pids[PIDTYPE_MAX]; + struct list_head thread_group; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ @@ -1192,13 +1193,17 @@ extern void wait_task_inactive(task_t * p); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) -extern task_t * FASTCALL(next_thread(const task_t *p)); - #define thread_group_leader(p) (p->pid == p->tgid) +static inline task_t *next_thread(task_t *p) +{ + return list_entry(rcu_dereference(p->thread_group.next), + task_t, thread_group); +} + static inline int thread_group_empty(task_t *p) { - return list_empty(&p->pids[PIDTYPE_TGID].pid_list); + return list_empty(&p->thread_group); } #define delay_group_leader(p) \ -- cgit v1.2.3 From a7e5328a06a2beee3a2bbfaf87ce2a7bbe937de1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:27 -0800 Subject: [PATCH] cleanup __exit_signal->cleanup_sighand path Move 'tsk->sighand = NULL' from cleanup_sighand() to __exit_signal(). This makes the exit path more understandable and allows us to do cleanup_sighand() outside of ->siglock protected section. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 99855f694eb..d04186d8cc6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,7 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void __cleanup_signal(struct signal_struct *); -extern void cleanup_sighand(struct task_struct *); +extern void __cleanup_sighand(struct sighand_struct *); extern void exit_itimers(struct signal_struct *); extern NORET_TYPE void do_group_exit(int); -- cgit v1.2.3