From 94bebf4d1b8e7719f0f3944c037a21cfd99a4af7 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 20 Dec 2006 10:52:44 +0100 Subject: Driver core: fix race in sysfs between sysfs_remove_file() and read()/write() This patch prevents a race between IO and removing a file from sysfs. It introduces a list of sysfs_buffers associated with a file at the inode. Upon removal of a file the list is walked and the buffers marked orphaned. IO to orphaned buffers fails with -ENODEV. The driver can safely free associated data structures or be unloaded. Signed-off-by: Oliver Neukum Acked-by: Maneesh Soni Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/inode.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'fs/sysfs/inode.c') diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index e79e38d52c0..1a81ebce20e 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "sysfs.h" extern struct super_block * sysfs_sb; @@ -209,6 +210,22 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) return NULL; } +static inline void orphan_all_buffers(struct inode *node) +{ + struct sysfs_buffer_collection *set = node->i_private; + struct sysfs_buffer *buf; + + mutex_lock(&node->i_mutex); + if (node->i_private) { + list_for_each_entry(buf, &set->associates, associates) { + down(&buf->sem); + buf->orphaned = 1; + up(&buf->sem); + } + } + mutex_unlock(&node->i_mutex); +} + /* * Unhashes the dentry corresponding to given sysfs_dirent @@ -217,16 +234,23 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) { struct dentry * dentry = sd->s_dentry; + struct inode *inode; if (dentry) { spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry) && dentry->d_inode)) { + inode = dentry->d_inode; + spin_lock(&inode->i_lock); + __iget(inode); + spin_unlock(&inode->i_lock); dget_locked(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, dentry); + orphan_all_buffers(inode); + iput(inode); } else { spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); -- cgit v1.2.3 From d3fc373ac5061cab7a654502b942e7d00e77f733 Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Fri, 5 Jan 2007 12:04:33 -0800 Subject: sysfs: suppress lockdep warnings Lockdep issues the following warning: [ 9.064000] ============================================= [ 9.064000] [ INFO: possible recursive locking detected ] [ 9.064000] 2.6.20-rc3-mm1 #3 [ 9.064000] --------------------------------------------- [ 9.064000] init/1 is trying to acquire lock: [ 9.064000] (&sysfs_inode_imutex_key){--..}, at: [] mutex_lock+0x1c/0x1f [ 9.064000] [ 9.064000] but task is already holding lock: [ 9.064000] (&sysfs_inode_imutex_key){--..}, at: [] mutex_lock+0x1c/0x1f [ 9.065000] [ 9.065000] other info that might help us debug this: [ 9.065000] 2 locks held by init/1: [ 9.065000] #0: (tty_mutex){--..}, at: [] mutex_lock+0x1c/0x1f [ 9.065000] #1: (&sysfs_inode_imutex_key){--..}, at: [] mutex_lock+0x1c/0x1f [ 9.065000] [ 9.065000] stack backtrace: [ 9.065000] [] show_trace_log_lvl+0x1a/0x30 [ 9.066000] [] show_trace+0x12/0x14 [ 9.066000] [] dump_stack+0x16/0x18 [ 9.066000] [] print_deadlock_bug+0xb9/0xc3 [ 9.066000] [] check_deadlock+0x55/0x5a [ 9.066000] [] __lock_acquire+0x371/0xbf0 [ 9.066000] [] lock_acquire+0x69/0x83 [ 9.066000] [] __mutex_lock_slowpath+0x75/0x2d1 [ 9.066000] [] mutex_lock+0x1c/0x1f [ 9.066000] [] sysfs_drop_dentry+0xb1/0x133 [ 9.066000] [] sysfs_hash_and_remove+0xb3/0x142 [ 9.066000] [] sysfs_remove_file+0xd/0x10 [ 9.067000] [] device_remove_file+0x23/0x2e [ 9.067000] [] device_del+0x188/0x1e6 [ 9.067000] [] device_unregister+0xb/0x15 [ 9.067000] [] device_destroy+0x9c/0xa9 [ 9.067000] [] vcs_remove_sysfs+0x1c/0x3b [ 9.067000] [] con_close+0x5e/0x6b [ 9.067000] [] release_dev+0x4c4/0x6e5 [ 9.067000] [] tty_release+0x12/0x1c [ 9.067000] [] __fput+0x177/0x1a0 [ 9.067000] [] fput+0x3b/0x41 [ 9.068000] [] filp_close+0x36/0x65 [ 9.068000] [] sys_close+0x63/0xa4 [ 9.068000] [] sysenter_past_esp+0x5f/0x99 [ 9.068000] ======================= This is due to sysfs_hash_and_remove() holding dir->d_inode->i_mutex before calling sysfs_drop_dentry() which calls orphan_all_buffers() which in turn takes node->i_mutex. Signed-off-by: Frederik Deweerdt Cc: Oliver Neukum Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/sysfs/inode.c') diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 1a81ebce20e..dbd820f9aee 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -215,7 +215,7 @@ static inline void orphan_all_buffers(struct inode *node) struct sysfs_buffer_collection *set = node->i_private; struct sysfs_buffer *buf; - mutex_lock(&node->i_mutex); + mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD); if (node->i_private) { list_for_each_entry(buf, &set->associates, associates) { down(&buf->sem); @@ -272,7 +272,7 @@ int sysfs_hash_and_remove(struct dentry * dir, const char * name) return -ENOENT; parent_sd = dir->d_fsdata; - mutex_lock(&dir->d_inode->i_mutex); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (!sd->s_element) continue; -- cgit v1.2.3 From b592fcfe7f06c15ec11774b5be7ce0de3aa86e73 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 24 Jan 2007 12:35:52 -0700 Subject: sysfs: Shadow directory support The problem. When implementing a network namespace I need to be able to have multiple network devices with the same name. Currently this is a problem for /sys/class/net/*. What I want is a separate /sys/class/net directory in sysfs for each network namespace, and I want to name each of them /sys/class/net. I looked and the VFS actually allows that. All that is needed is for /sys/class/net to implement a follow link method to redirect lookups to the real directory you want. Implementing a follow link method that is sensitive to the current network namespace turns out to be 3 lines of code so it looks like a clean approach. Modifying sysfs so it doesn't get in my was is a bit trickier. I am calling the concept of multiple directories all at the same path in the filesystem shadow directories. With the directory entry really at that location the shadow master. The following patch modifies sysfs so it can handle a directory structure slightly different from the kobject tree so I can implement the shadow directories for handling /sys/class/net/. Signed-off-by: Eric W. Biederman Cc: Maneesh Soni Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/inode.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/sysfs/inode.c') diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index dbd820f9aee..542d2bcc73d 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -33,6 +33,16 @@ static struct inode_operations sysfs_inode_operations ={ .setattr = sysfs_setattr, }; +void sysfs_delete_inode(struct inode *inode) +{ + /* Free the shadowed directory inode operations */ + if (sysfs_is_shadowed_inode(inode)) { + kfree(inode->i_op); + inode->i_op = NULL; + } + return generic_delete_inode(inode); +} + int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) { struct inode * inode = dentry->d_inode; -- cgit v1.2.3