From ae87221d3ce49d9de1e43756da834fd0bf05a2ad Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 24 Aug 2007 16:11:54 -0700 Subject: sysfs: crash debugging Print the name of the last-accessed sysfs file when we oops, to help track down oopses which occur in sysfs store/read handlers. Because these oopses tend to not leave any trace of the offending code in the stack traces. Cc: Kay Sievers Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/sysfs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index c9e4e5091da..ce8339c70a4 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -19,10 +19,18 @@ #include #include #include +#include #include #include "sysfs.h" +/* used in crash dumps to help with debugging */ +static char last_sysfs_file[PATH_MAX]; +void sysfs_printk_last_file(void) +{ + printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file); +} + /* * There's one sysfs_buffer for each open file and one * sysfs_open_dirent for each sysfs_dirent with one or more open @@ -328,6 +336,11 @@ static int sysfs_open_file(struct inode *inode, struct file *file) struct sysfs_buffer *buffer; struct sysfs_ops *ops; int error = -EACCES; + char *p; + + p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); + if (p) + memmove(last_sysfs_file, p, strlen(p) + 1); /* need attr_sd for attr and ops, its parent for kobj */ if (!sysfs_get_active_two(attr_sd)) -- cgit v1.2.3 From f1282c844e86db5a041afa41335b5f9eea6cec0c Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 16 Jul 2008 08:58:04 +1000 Subject: sysfs: Support sysfs_notify from atomic context with new sysfs_notify_dirent Support sysfs_notify from atomic context with new sysfs_notify_dirent sysfs_notify currently takes sysfs_mutex. This means that it cannot be called in atomic context. sysfs_mutex is sometimes held over a malloc (sysfs_rename_dir) so it can block on low memory. In md I want to be able to notify on a sysfs attribute from atomic context, and I don't want to block on low memory because I could be in the writeout path for freeing memory. So: - export the "sysfs_dirent" structure along with sysfs_get, sysfs_put and sysfs_get_dirent so I can get the sysfs_dirent that I want to notify on and hold it in an md structure. - split sysfs_notify_dirent out of sysfs_notify so the sysfs_dirent can be notified on with no blocking (just a spinlock). Signed-off-by: Neil Brown Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 1 + fs/sysfs/file.c | 31 ++++++++++++++++++------------- fs/sysfs/mount.c | 15 +++++++++++++++ fs/sysfs/sysfs.h | 6 ++++-- 4 files changed, 38 insertions(+), 15 deletions(-) (limited to 'fs/sysfs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index aedaeba82ae..53bc7fc31af 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -636,6 +636,7 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, return sd; } +EXPORT_SYMBOL_GPL(sysfs_get_dirent); static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, const char *name, struct sysfs_dirent **p_sd) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index ce8339c70a4..d0d79e6b6d1 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -453,6 +453,22 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) return POLLERR|POLLPRI; } +void sysfs_notify_dirent(struct sysfs_dirent *sd) +{ + struct sysfs_open_dirent *od; + + spin_lock(&sysfs_open_dirent_lock); + + od = sd->s_attr.open; + if (od) { + atomic_inc(&od->event); + wake_up_interruptible(&od->poll); + } + + spin_unlock(&sysfs_open_dirent_lock); +} +EXPORT_SYMBOL_GPL(sysfs_notify_dirent); + void sysfs_notify(struct kobject *k, char *dir, char *attr) { struct sysfs_dirent *sd = k->sd; @@ -463,19 +479,8 @@ void sysfs_notify(struct kobject *k, char *dir, char *attr) sd = sysfs_find_dirent(sd, dir); if (sd && attr) sd = sysfs_find_dirent(sd, attr); - if (sd) { - struct sysfs_open_dirent *od; - - spin_lock(&sysfs_open_dirent_lock); - - od = sd->s_attr.open; - if (od) { - atomic_inc(&od->event); - wake_up_interruptible(&od->poll); - } - - spin_unlock(&sysfs_open_dirent_lock); - } + if (sd) + sysfs_notify_dirent(sd); mutex_unlock(&sysfs_mutex); } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 14f0023984d..ab343e371d6 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "sysfs.h" @@ -115,3 +116,17 @@ out_err: sysfs_dir_cachep = NULL; goto out; } + +#undef sysfs_get +struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) +{ + return __sysfs_get(sd); +} +EXPORT_SYMBOL_GPL(sysfs_get); + +#undef sysfs_put +void sysfs_put(struct sysfs_dirent *sd) +{ + __sysfs_put(sd); +} +EXPORT_SYMBOL_GPL(sysfs_put); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index a5db496f71c..93c6d6b27c4 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -124,7 +124,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd); void sysfs_remove_subdir(struct sysfs_dirent *sd); -static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) +static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) { if (sd) { WARN_ON(!atomic_read(&sd->s_count)); @@ -132,12 +132,14 @@ static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) } return sd; } +#define sysfs_get(sd) __sysfs_get(sd) -static inline void sysfs_put(struct sysfs_dirent *sd) +static inline void __sysfs_put(struct sysfs_dirent *sd) { if (sd && atomic_dec_and_test(&sd->s_count)) release_sysfs_dirent(sd); } +#define sysfs_put(sd) __sysfs_put(sd) /* * inode.c -- cgit v1.2.3 From b31ca3f5dfc89c3f1f654b30f0760d0e2fb15e45 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 12 Sep 2008 11:24:11 +0200 Subject: sysfs: fix deadlock On Thu, Sep 11, 2008 at 10:27:10AM +0200, Ingo Molnar wrote: > and it's working fine on most boxes. One testbox found this new locking > scenario: > > PM: Adding info for No Bus:vcsa7 > EDAC DEBUG: MC0: i82860_check() > > ======================================================= > [ INFO: possible circular locking dependency detected ] > 2.6.27-rc6-tip #1 > ------------------------------------------------------- > X/4873 is trying to acquire lock: > (&bb->mutex){--..}, at: [] mmap+0x40/0xa0 > > but task is already holding lock: > (&mm->mmap_sem){----}, at: [] sys_mmap2+0x8e/0xc0 > > which lock already depends on the new lock. > > > the existing dependency chain (in reverse order) is: > > -> #1 (&mm->mmap_sem){----}: > [] validate_chain+0xa96/0xf50 > [] __lock_acquire+0x2cb/0x5b0 > [] lock_acquire+0x89/0xc0 > [] might_fault+0x6b/0x90 > [] copy_to_user+0x38/0x60 > [] read+0xfb/0x170 > [] vfs_read+0x95/0x110 > [] sys_pread64+0x63/0x80 > [] sysenter_do_call+0x12/0x43 > [] 0xffffffff > > -> #0 (&bb->mutex){--..}: > [] validate_chain+0x6b7/0xf50 > [] __lock_acquire+0x2cb/0x5b0 > [] lock_acquire+0x89/0xc0 > [] __mutex_lock_common+0xab/0x3c0 > [] mutex_lock_nested+0x38/0x50 > [] mmap+0x40/0xa0 > [] mmap_region+0x14e/0x450 > [] do_mmap_pgoff+0x2ef/0x310 > [] sys_mmap2+0xad/0xc0 > [] sysenter_do_call+0x12/0x43 > [] 0xffffffff > > other info that might help us debug this: > > 1 lock held by X/4873: > #0: (&mm->mmap_sem){----}, at: [] sys_mmap2+0x8e/0xc0 > > stack backtrace: > Pid: 4873, comm: X Not tainted 2.6.27-rc6-tip #1 > [] print_circular_bug_tail+0x79/0xc0 > [] validate_chain+0x6b7/0xf50 > [] ? trace_hardirqs_off_caller+0x15/0xb0 > [] __lock_acquire+0x2cb/0x5b0 > [] lock_acquire+0x89/0xc0 > [] ? mmap+0x40/0xa0 > [] __mutex_lock_common+0xab/0x3c0 > [] ? mmap+0x40/0xa0 > [] mutex_lock_nested+0x38/0x50 > [] ? mmap+0x40/0xa0 > [] mmap+0x40/0xa0 > [] mmap_region+0x14e/0x450 > [] ? arch_get_unmapped_area_topdown+0xf8/0x160 > [] do_mmap_pgoff+0x2ef/0x310 > [] sys_mmap2+0xad/0xc0 > [] sysenter_do_call+0x12/0x43 > [] ? __switch_to+0x130/0x220 > ======================= > evbug.c: Event. Dev: input3, Type: 20, Code: 0, Value: 500 > warning: `sudo' uses deprecated v2 capabilities in a way that may be insecure. > > i've attached the config. > > at first sight it looks like a genuine bug in fs/sysfs/bin.c? Yes, it is a real bug by the looks. bin.c takes bb->mutex under mmap_sem when it is mmapped, and then does its copy_*_user under bb->mutex too. Here is a basic fix for the sysfs lor. From: Nick Piggin Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/bin.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) (limited to 'fs/sysfs') diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 006fc64227d..66f6e58a7e4 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -61,6 +61,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) int size = dentry->d_inode->i_size; loff_t offs = *off; int count = min_t(size_t, bytes, PAGE_SIZE); + char *temp; if (size) { if (offs > size) @@ -69,23 +70,33 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) count = size - offs; } + temp = kmalloc(count, GFP_KERNEL); + if (!temp) + return -ENOMEM; + mutex_lock(&bb->mutex); count = fill_read(dentry, bb->buffer, offs, count); - if (count < 0) - goto out_unlock; + if (count < 0) { + mutex_unlock(&bb->mutex); + goto out_free; + } - if (copy_to_user(userbuf, bb->buffer, count)) { + memcpy(temp, bb->buffer, count); + + mutex_unlock(&bb->mutex); + + if (copy_to_user(userbuf, temp, count)) { count = -EFAULT; - goto out_unlock; + goto out_free; } pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); *off = offs + count; - out_unlock: - mutex_unlock(&bb->mutex); + out_free: + kfree(temp); return count; } @@ -118,6 +129,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, int size = dentry->d_inode->i_size; loff_t offs = *off; int count = min_t(size_t, bytes, PAGE_SIZE); + char *temp; if (size) { if (offs > size) @@ -126,19 +138,27 @@ static ssize_t write(struct file *file, const char __user *userbuf, count = size - offs; } - mutex_lock(&bb->mutex); + temp = kmalloc(count, GFP_KERNEL); + if (!temp) + return -ENOMEM; - if (copy_from_user(bb->buffer, userbuf, count)) { + if (copy_from_user(temp, userbuf, count)) { count = -EFAULT; - goto out_unlock; + goto out_free; } + mutex_lock(&bb->mutex); + + memcpy(bb->buffer, temp, count); + count = flush_write(dentry, bb->buffer, offs, count); + mutex_unlock(&bb->mutex); + if (count > 0) *off = offs + count; - out_unlock: - mutex_unlock(&bb->mutex); +out_free: + kfree(temp); return count; } -- cgit v1.2.3 From 45c076c5d71e6e644e2eae64f80922d162c900ac Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 28 Sep 2008 07:48:08 +0900 Subject: sysfs: use ilookup5() instead of ilookup5_nowait() As inode creation is protected by sysfs_mutex, ilookup5_nowait() always either fails to find at all or finds one which is fully initialized, so using ilookup5_nowait() or ilookup5() doesn't make any difference. Switch to ilookup5() as it's planned to be removed. This change also makes lookup return value handling a bit simpler. This change was suggested by Al Viro. Signed-off-by: Tejun Heo Cc: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs/sysfs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 53bc7fc31af..c18342641ce 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -370,17 +370,17 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, memset(acxt, 0, sizeof(*acxt)); acxt->parent_sd = parent_sd; - /* Lookup parent inode. inode initialization and I_NEW - * clearing are protected by sysfs_mutex. By grabbing it and - * looking up with _nowait variant, inode state can be - * determined reliably. + /* Lookup parent inode. inode initialization is protected by + * sysfs_mutex, so inode existence can be determined by + * looking up inode while holding sysfs_mutex. */ mutex_lock(&sysfs_mutex); - inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, - parent_sd); + inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, + parent_sd); + if (inode) { + WARN_ON(inode->i_state & I_NEW); - if (inode && !(inode->i_state & I_NEW)) { /* parent inode available */ acxt->parent_inode = inode; @@ -393,8 +393,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, mutex_lock(&inode->i_mutex); mutex_lock(&sysfs_mutex); } - } else - iput(inode); + } } /** -- cgit v1.2.3 From 8c0e3998f5b71e68fe6b6e489a92e052715e563c Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Thu, 25 Sep 2008 16:45:13 -0700 Subject: sysfs: Make dir and name args to sysfs_notify() const Because they can be, and because code like this produces a warning if they're not: struct device_attribute dev_attr; sysfs_notify(&kobj, NULL, dev_attr.attr.name); Signed-off-by: Trent Piepho CC: Neil Brown Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/sysfs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index d0d79e6b6d1..1f4a3f87726 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -469,7 +469,7 @@ void sysfs_notify_dirent(struct sysfs_dirent *sd) } EXPORT_SYMBOL_GPL(sysfs_notify_dirent); -void sysfs_notify(struct kobject *k, char *dir, char *attr) +void sysfs_notify(struct kobject *k, const char *dir, const char *attr) { struct sysfs_dirent *sd = k->sd; -- cgit v1.2.3 From 0b4a4fea253e1296222603ccc55430ed7cd9413a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 3 Jul 2008 18:05:28 -0700 Subject: kobject: Cleanup kobject_rename and !CONFIG_SYSFS It finally dawned on me what the clean fix to sysfs_rename_dir calling kobject_set_name is. Move the work into kobject_rename where it belongs. The callers serialize us anyway so this is safe. Signed-off-by: Eric W. Biederman Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/sysfs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index c18342641ce..3a05a596e3b 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -829,16 +829,12 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) if (!new_dentry) goto out_unlock; - /* rename kobject and sysfs_dirent */ + /* rename sysfs_dirent */ error = -ENOMEM; new_name = dup_name = kstrdup(new_name, GFP_KERNEL); if (!new_name) goto out_unlock; - error = kobject_set_name(kobj, "%s", new_name); - if (error) - goto out_unlock; - dup_name = sd->s_name; sd->s_name = new_name; -- cgit v1.2.3