From 3959214f971417f4162926ac52ad4cd042958caa Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 24 Mar 2009 15:43:30 +0100 Subject: sched: delayed cleanup of user_struct During bootup performance tracing we see repeated occurrences of /sys/kernel/uid/* events for the same uid, leading to a, in this case, rather pointless userspace processing for the same uid over and over. This is usually caused by tools which change their uid to "nobody", to run without privileges to read data supplied by untrusted users. This change delays the execution of the (already existing) scheduled work, to cleanup the uid after one second, so the allocated and announced uid can possibly be re-used by another process. This is the current behavior, where almost every invocation of a binary, which changes the uid, creates two events: $ read START < /sys/kernel/uevent_seqnum; \ for i in `seq 100`; do su --shell=/bin/true bin; done; \ read END < /sys/kernel/uevent_seqnum; \ echo $(($END - $START)) 178 With the delayed cleanup, we get only two events, and userspace finishes a bit faster too: $ read START < /sys/kernel/uevent_seqnum; \ for i in `seq 100`; do su --shell=/bin/true bin; done; \ read END < /sys/kernel/uevent_seqnum; \ echo $(($END - $START)) 1 Acked-by: Dhaval Giani Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/user.c | 67 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/user.c b/kernel/user.c index 850e0ba41c1..2c000e7132a 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -75,21 +75,6 @@ static void uid_hash_remove(struct user_struct *up) put_user_ns(up->user_ns); } -static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) -{ - struct user_struct *user; - struct hlist_node *h; - - hlist_for_each_entry(user, h, hashent, uidhash_node) { - if (user->uid == uid) { - atomic_inc(&user->__count); - return user; - } - } - - return NULL; -} - #ifdef CONFIG_USER_SCHED static void sched_destroy_user(struct user_struct *up) @@ -119,6 +104,23 @@ static int sched_create_user(struct user_struct *up) { return 0; } #if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) +static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) +{ + struct user_struct *user; + struct hlist_node *h; + + hlist_for_each_entry(user, h, hashent, uidhash_node) { + if (user->uid == uid) { + /* possibly resurrect an "almost deleted" object */ + if (atomic_inc_return(&user->__count) == 1) + cancel_delayed_work(&user->work); + return user; + } + } + + return NULL; +} + static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ static DEFINE_MUTEX(uids_mutex); @@ -283,12 +285,12 @@ int __init uids_sysfs_init(void) return uids_user_create(&root_user); } -/* work function to remove sysfs directory for a user and free up +/* delayed work function to remove sysfs directory for a user and free up * corresponding structures. */ static void cleanup_user_struct(struct work_struct *w) { - struct user_struct *up = container_of(w, struct user_struct, work); + struct user_struct *up = container_of(w, struct user_struct, work.work); unsigned long flags; int remove_user = 0; @@ -297,15 +299,12 @@ static void cleanup_user_struct(struct work_struct *w) */ uids_mutex_lock(); - local_irq_save(flags); - - if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { + spin_lock_irqsave(&uidhash_lock, flags); + if (atomic_read(&up->__count) == 0) { uid_hash_remove(up); remove_user = 1; - spin_unlock_irqrestore(&uidhash_lock, flags); - } else { - local_irq_restore(flags); } + spin_unlock_irqrestore(&uidhash_lock, flags); if (!remove_user) goto done; @@ -331,16 +330,28 @@ done: */ static void free_user(struct user_struct *up, unsigned long flags) { - /* restore back the count */ - atomic_inc(&up->__count); spin_unlock_irqrestore(&uidhash_lock, flags); - - INIT_WORK(&up->work, cleanup_user_struct); - schedule_work(&up->work); + INIT_DELAYED_WORK(&up->work, cleanup_user_struct); + schedule_delayed_work(&up->work, msecs_to_jiffies(1000)); } #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ +static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) +{ + struct user_struct *user; + struct hlist_node *h; + + hlist_for_each_entry(user, h, hashent, uidhash_node) { + if (user->uid == uid) { + atomic_inc(&user->__count); + return user; + } + } + + return NULL; +} + int uids_sysfs_init(void) { return 0; } static inline int uids_user_create(struct user_struct *up) { return 0; } static inline void uids_mutex_lock(void) { } -- cgit v1.2.3 From 156f5a7801195fa2ce44aeeb62d6cf8468f3332a Mon Sep 17 00:00:00 2001 From: GeunSik Lim Date: Tue, 2 Jun 2009 15:01:37 +0900 Subject: debugfs: Fix terminology inconsistency of dir name to mount debugfs filesystem. Many developers use "/debug/" or "/debugfs/" or "/sys/kernel/debug/" directory name to mount debugfs filesystem for ftrace according to ./Documentation/tracers/ftrace.txt file. And, three directory names(ex:/debug/, /debugfs/, /sys/kernel/debug/) is existed in kernel source like ftrace, DRM, Wireless, Documentation, Network[sky2]files to mount debugfs filesystem. debugfs means debug filesystem for debugging easy to use by greg kroah hartman. "/sys/kernel/debug/" name is suitable as directory name of debugfs filesystem. - debugfs related reference: http://lwn.net/Articles/334546/ Fix inconsistency of directory name to mount debugfs filesystem. * From Steven Rostedt - find_debugfs() and tracing_files() in this patch. Signed-off-by: GeunSik Lim Acked-by : Inaky Perez-Gonzalez Reviewed-by : Steven Rostedt Reviewed-by : James Smart CC: Jiri Kosina CC: David Airlie CC: Peter Osterlund CC: Ananth N Mavinakayanahalli CC: Anil S Keshavamurthy CC: Masami Hiramatsu Signed-off-by: Greg Kroah-Hartman --- kernel/trace/Kconfig | 10 +++++----- kernel/trace/trace.c | 23 +++++++++++------------ 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4a13e5a01ce..61071fecc82 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -147,7 +147,7 @@ config IRQSOFF_TRACER disabled by default and can be runtime (re-)started via: - echo 0 > /debugfs/tracing/tracing_max_latency + echo 0 > /sys/kernel/debug/tracing/tracing_max_latency (Note that kernel size and overhead increases with this option enabled. This option and the preempt-off timing option can be @@ -168,7 +168,7 @@ config PREEMPT_TRACER disabled by default and can be runtime (re-)started via: - echo 0 > /debugfs/tracing/tracing_max_latency + echo 0 > /sys/kernel/debug/tracing/tracing_max_latency (Note that kernel size and overhead increases with this option enabled. This option and the irqs-off timing option can be @@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES This tracer profiles all the the likely and unlikely macros in the kernel. It will display the results in: - /debugfs/tracing/profile_annotated_branch + /sys/kernel/debug/tracing/profile_annotated_branch Note: this will add a significant overhead, only turn this on if you need to profile the system's use of these macros. @@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES taken in the kernel is recorded whether it hit or miss. The results will be displayed in: - /debugfs/tracing/profile_branch + /sys/kernel/debug/tracing/profile_branch This option also enables the likely/unlikely profiler. @@ -323,7 +323,7 @@ config STACK_TRACER select KALLSYMS help This special tracer records the maximum stack footprint of the - kernel and displays it in debugfs/tracing/stack_trace. + kernel and displays it in /sys/kernel/debug/tracing/stack_trace. This tracer works by hooking into every function call that the kernel executes, and keeping a maximum stack depth value and diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8acd9b81a5d..c1878bfb2e1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock = /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, - * for later retrieval via /debugfs/tracing/latency_trace) + * for later retrieval via /sys/kernel/debug/tracing/latency_trace) */ static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) @@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = { static const char readme_msg[] = "tracing mini-HOWTO:\n\n" - "# mkdir /debug\n" - "# mount -t debugfs nodev /debug\n\n" - "# cat /debug/tracing/available_tracers\n" + "# mount -t debugfs nodev /sys/kernel/debug\n\n" + "# cat /sys/kernel/debug/tracing/available_tracers\n" "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" - "# cat /debug/tracing/current_tracer\n" + "# cat /sys/kernel/debug/tracing/current_tracer\n" "nop\n" - "# echo sched_switch > /debug/tracing/current_tracer\n" - "# cat /debug/tracing/current_tracer\n" + "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n" + "# cat /sys/kernel/debug/tracing/current_tracer\n" "sched_switch\n" - "# cat /debug/tracing/trace_options\n" + "# cat /sys/kernel/debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" - "# echo print-parent > /debug/tracing/trace_options\n" - "# echo 1 > /debug/tracing/tracing_enabled\n" - "# cat /debug/tracing/trace > /tmp/trace.txt\n" - "# echo 0 > /debug/tracing/tracing_enabled\n" + "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" + "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n" + "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" + "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n" ; static ssize_t -- cgit v1.2.3