From 864d7c4c068f23642efe91b33be3a84afe5f71e0 Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Sun, 26 Apr 2009 20:25:56 +1000 Subject: fs: move mark_files_ro into file_table.c This function walks the s_files lock, and operates primarily on the files in a superblock, so it better belongs here (eg. see also fs_may_remount_ro). [AV: ... and it shouldn't be static after that move] Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- fs/super.c | 39 --------------------------------------- 1 file changed, 39 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 1943fdf655f..c170551c23f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -615,45 +615,6 @@ out: return err; } -/** - * mark_files_ro - mark all files read-only - * @sb: superblock in question - * - * All files are marked read-only. We don't care about pending - * delete files so this should be used in 'force' mode only. - */ - -static void mark_files_ro(struct super_block *sb) -{ - struct file *f; - -retry: - file_list_lock(); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { - struct vfsmount *mnt; - if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) - continue; - if (!file_count(f)) - continue; - if (!(f->f_mode & FMODE_WRITE)) - continue; - f->f_mode &= ~FMODE_WRITE; - if (file_check_writeable(f) != 0) - continue; - file_release_write(f); - mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ - mnt_drop_write(mnt); - mntput(mnt); - goto retry; - } - file_list_unlock(); -} - /** * do_remount_sb - asks filesystem to change mount options. * @sb: superblock in question -- cgit v1.2.3 From 876a9f76abbcb775f8d21cbc99fa161f9e5937f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Apr 2009 18:05:55 +0200 Subject: remove s_async_list Remove the unused s_async_list in the superblock, a leftover of the broken async inode deletion code that leaked into mainline. Having this in the middle of the sync/unmount path is not helpful for the following cleanups. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/super.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index c170551c23f..3d9f117dd2a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include "internal.h" @@ -72,7 +71,6 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); - INIT_LIST_HEAD(&s->s_async_list); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); lockdep_set_class(&s->s_umount, &type->s_umount_key); @@ -342,11 +340,6 @@ void generic_shutdown_super(struct super_block *sb) lock_super(sb); sb->s_flags &= ~MS_ACTIVE; - /* - * wait for asynchronous fs operations to finish before going further - */ - async_synchronize_full_domain(&sb->s_async_list); - /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); lock_kernel(); @@ -517,7 +510,6 @@ restart: sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); - async_synchronize_full_domain(&sb->s_async_list); if (sb->s_root && (wait || sb->s_dirt)) sb->s_op->sync_fs(sb, wait); up_read(&sb->s_umount); -- cgit v1.2.3 From 5a3e5cb8e08bd876e2542c1451c9a93dab1b0e39 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:48 +0200 Subject: vfs: Fix sys_sync() and fsync_super() reliability (version 4) So far, do_sync() called: sync_inodes(0); sync_supers(); sync_filesystems(0); sync_filesystems(1); sync_inodes(1); This ordering makes it kind of hard for filesystems as sync_inodes(0) need not submit all the IO (for example it skips inodes with I_SYNC set) so e.g. forcing transaction to disk in ->sync_fs() is not really enough. Therefore sys_sync has not been completely reliable on some filesystems (ext3, ext4, reiserfs, ocfs2 and others are hit by this) when racing e.g. with background writeback. A similar problem hits also other filesystems (e.g. ext2) because of write_supers() being called before the sync_inodes(1). Change the ordering of calls in do_sync() - this requires a new function sync_blockdevs() to preserve the property that block devices are always synced after write_super() / sync_fs() call. The same issue is fixed in __fsync_super() function used on umount / remount read-only. [AV: build fixes] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 3d9f117dd2a..18d159dc1e4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -293,6 +293,7 @@ void __fsync_super(struct super_block *sb) { sync_inodes_sb(sb, 0); vfs_dq_sync(sb); + sync_inodes_sb(sb, 1); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); @@ -300,7 +301,6 @@ void __fsync_super(struct super_block *sb) if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); sync_blockdev(sb->s_bdev); - sync_inodes_sb(sb, 1); } /* @@ -522,6 +522,33 @@ restart: mutex_unlock(&mutex); } +#ifdef CONFIG_BLOCK +/* + * Sync all block devices underlying some superblock + */ +void sync_blockdevs(void) +{ + struct super_block *sb; + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (!sb->s_bdev) + continue; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) + sync_blockdev(sb->s_bdev); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); +} +#endif + /** * get_super - get the superblock of a device * @bdev: device to get the superblock for -- cgit v1.2.3 From bfe881255c74800147523b59c85328a1a826ba21 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:49 +0200 Subject: vfs: Call ->sync_fs() even if s_dirt is 0 (version 4) sync_filesystems() has a condition that if wait == 0 and s_dirt == 0, then ->sync_fs() isn't called. This does not really make much sence since s_dirt is generally used by a filesystem to mean that ->write_super() needs to be called. But ->sync_fs() does different things. I even suspect that some filesystems (btrfs?) sets s_dirt just to fool this logic. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 18d159dc1e4..fae91ba38e4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -510,7 +510,7 @@ restart: sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root && (wait || sb->s_dirt)) + if (sb->s_root) sb->s_op->sync_fs(sb, wait); up_read(&sb->s_umount); /* restart only when sb is no longer on the list */ -- cgit v1.2.3 From 429479f031322a0cc5c921ffb2321a51718dc875 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:50 +0200 Subject: vfs: Make __fsync_super() a static function (version 4) __fsync_super() does the same thing as fsync_super(). So change the only caller to use fsync_super() and make __fsync_super() static. This removes unnecessarily duplicated call to sync_blockdev() and prepares ground for the changes to __fsync_super() in the following patches. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index fae91ba38e4..8dbe1ead9dd 100644 --- a/fs/super.c +++ b/fs/super.c @@ -289,7 +289,7 @@ EXPORT_SYMBOL(unlock_super); * device. Takes the superblock lock. Requires a second blkdev * flush by the caller to complete the operation. */ -void __fsync_super(struct super_block *sb) +static int __fsync_super(struct super_block *sb) { sync_inodes_sb(sb, 0); vfs_dq_sync(sb); @@ -300,7 +300,7 @@ void __fsync_super(struct super_block *sb) unlock_super(sb); if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + return sync_blockdev(sb->s_bdev); } /* @@ -310,8 +310,7 @@ void __fsync_super(struct super_block *sb) */ int fsync_super(struct super_block *sb) { - __fsync_super(sb); - return sync_blockdev(sb->s_bdev); + return __fsync_super(sb); } EXPORT_SYMBOL_GPL(fsync_super); -- cgit v1.2.3 From 5cee5815d1564bbbd505fea86f4550f1efdb5cd0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:51 +0200 Subject: vfs: Make sys_sync() use fsync_super() (version 4) It is unnecessarily fragile to have two places (fsync_super() and do_sync()) doing data integrity sync of the filesystem. Alter __fsync_super() to accommodate needs of both callers and use it. So after this patch __fsync_super() is the only place where we gather all the calls needed to properly send all data on a filesystem to disk. Nice bonus is that we get a complete livelock avoidance and write_supers() is now only used for periodic writeback of superblocks. sync_blockdevs() introduced a couple of patches ago is gone now. [build fixes folded] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 72 ++++++++++++++++++++------------------------------------------ 1 file changed, 23 insertions(+), 49 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 8dbe1ead9dd..c8ce5ed0424 100644 --- a/fs/super.c +++ b/fs/super.c @@ -284,23 +284,23 @@ EXPORT_SYMBOL(lock_super); EXPORT_SYMBOL(unlock_super); /* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. Requires a second blkdev - * flush by the caller to complete the operation. + * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) + * just dirties buffers with inodes so we have to submit IO for these buffers + * via __sync_blockdev(). This also speeds up the wait == 1 case since in that + * case write_inode() functions do sync_dirty_buffer() and thus effectively + * write one block at a time. */ -static int __fsync_super(struct super_block *sb) +static int __fsync_super(struct super_block *sb, int wait) { - sync_inodes_sb(sb, 0); vfs_dq_sync(sb); - sync_inodes_sb(sb, 1); + sync_inodes_sb(sb, wait); lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - return sync_blockdev(sb->s_bdev); + sb->s_op->sync_fs(sb, wait); + return __sync_blockdev(sb->s_bdev, wait); } /* @@ -310,7 +310,12 @@ static int __fsync_super(struct super_block *sb) */ int fsync_super(struct super_block *sb) { - return __fsync_super(sb); + int ret; + + ret = __fsync_super(sb, 0); + if (ret < 0) + return ret; + return __fsync_super(sb, 1); } EXPORT_SYMBOL_GPL(fsync_super); @@ -469,20 +474,18 @@ restart: } /* - * Call the ->sync_fs super_op against all filesystems which are r/w and - * which implement it. + * Sync all the data for all the filesystems (called by sys_sync() and + * emergency sync) * * This operation is careful to avoid the livelock which could easily happen - * if two or more filesystems are being continuously dirtied. s_need_sync_fs + * if two or more filesystems are being continuously dirtied. s_need_sync * is used only here. We set it against all filesystems and then clear it as * we sync them. So redirtied filesystems are skipped. * * But if process A is currently running sync_filesystems and then process B - * calls sync_filesystems as well, process B will set all the s_need_sync_fs + * calls sync_filesystems as well, process B will set all the s_need_sync * flags again, which will cause process A to resync everything. Fix that with * a local mutex. - * - * (Fabian) Avoid sync_fs with clean fs & wait mode 0 */ void sync_filesystems(int wait) { @@ -492,25 +495,23 @@ void sync_filesystems(int wait) mutex_lock(&mutex); /* Could be down_interruptible */ spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_op->sync_fs) - continue; if (sb->s_flags & MS_RDONLY) continue; - sb->s_need_sync_fs = 1; + sb->s_need_sync = 1; } restart: list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_need_sync_fs) + if (!sb->s_need_sync) continue; - sb->s_need_sync_fs = 0; + sb->s_need_sync = 0; if (sb->s_flags & MS_RDONLY) continue; /* hm. Was remounted r/o meanwhile */ sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); if (sb->s_root) - sb->s_op->sync_fs(sb, wait); + __fsync_super(sb, wait); up_read(&sb->s_umount); /* restart only when sb is no longer on the list */ spin_lock(&sb_lock); @@ -521,33 +522,6 @@ restart: mutex_unlock(&mutex); } -#ifdef CONFIG_BLOCK -/* - * Sync all block devices underlying some superblock - */ -void sync_blockdevs(void) -{ - struct super_block *sb; - - spin_lock(&sb_lock); -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_bdev) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root) - sync_blockdev(sb->s_bdev); - up_read(&sb->s_umount); - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); -} -#endif - /** * get_super - get the superblock of a device * @bdev: device to get the superblock for -- cgit v1.2.3 From c15c54f5f056ee4819da9fde59a5f2cd45445f23 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:52 +0200 Subject: vfs: Move syncing code from super.c to sync.c (version 4) Move sync_filesystems(), __fsync_super(), fsync_super() from super.c to sync.c where it fits better. [build fixes folded] Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 85 -------------------------------------------------------------- 1 file changed, 85 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index c8ce5ed0424..f822c74f25b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -283,42 +283,6 @@ void unlock_super(struct super_block * sb) EXPORT_SYMBOL(lock_super); EXPORT_SYMBOL(unlock_super); -/* - * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) - * just dirties buffers with inodes so we have to submit IO for these buffers - * via __sync_blockdev(). This also speeds up the wait == 1 case since in that - * case write_inode() functions do sync_dirty_buffer() and thus effectively - * write one block at a time. - */ -static int __fsync_super(struct super_block *sb, int wait) -{ - vfs_dq_sync(sb); - sync_inodes_sb(sb, wait); - lock_super(sb); - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, wait); - return __sync_blockdev(sb->s_bdev, wait); -} - -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) -{ - int ret; - - ret = __fsync_super(sb, 0); - if (ret < 0) - return ret; - return __fsync_super(sb, 1); -} -EXPORT_SYMBOL_GPL(fsync_super); - /** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill @@ -473,55 +437,6 @@ restart: spin_unlock(&sb_lock); } -/* - * Sync all the data for all the filesystems (called by sys_sync() and - * emergency sync) - * - * This operation is careful to avoid the livelock which could easily happen - * if two or more filesystems are being continuously dirtied. s_need_sync - * is used only here. We set it against all filesystems and then clear it as - * we sync them. So redirtied filesystems are skipped. - * - * But if process A is currently running sync_filesystems and then process B - * calls sync_filesystems as well, process B will set all the s_need_sync - * flags again, which will cause process A to resync everything. Fix that with - * a local mutex. - */ -void sync_filesystems(int wait) -{ - struct super_block *sb; - static DEFINE_MUTEX(mutex); - - mutex_lock(&mutex); /* Could be down_interruptible */ - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (sb->s_flags & MS_RDONLY) - continue; - sb->s_need_sync = 1; - } - -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_need_sync) - continue; - sb->s_need_sync = 0; - if (sb->s_flags & MS_RDONLY) - continue; /* hm. Was remounted r/o meanwhile */ - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root) - __fsync_super(sb, wait); - up_read(&sb->s_umount); - /* restart only when sb is no longer on the list */ - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); - mutex_unlock(&mutex); -} - /** * get_super - get the superblock of a device * @bdev: device to get the superblock for -- cgit v1.2.3 From 60b0680fa236ac4e17ce31a50048c9d75f9ec831 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 27 Apr 2009 16:43:53 +0200 Subject: vfs: Rename fsync_super() to sync_filesystem() (version 4) Rename the function so that it better describe what it really does. Also remove the unnecessary include of buffer_head.h. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index f822c74f25b..721236e6177 100644 --- a/fs/super.c +++ b/fs/super.c @@ -28,7 +28,6 @@ #include #include #include -#include /* for fsync_super() */ #include #include #include @@ -304,7 +303,7 @@ void generic_shutdown_super(struct super_block *sb) if (sb->s_root) { shrink_dcache_for_umount(sb); - fsync_super(sb); + sync_filesystem(sb); lock_super(sb); sb->s_flags &= ~MS_ACTIVE; @@ -543,7 +542,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); - fsync_super(sb); + sync_filesystem(sb); /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ -- cgit v1.2.3 From 8c85e125124a473d6f3e9bb187b0b84207f81d91 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Apr 2009 18:00:26 +0200 Subject: remove ->write_super call in generic_shutdown_super We just did a full fs writeout using sync_filesystem before, and if that's not enough for the filesystem it can perform it's own writeout in ->put_super, which many filesystems already do. Move a call to foofs_write_super into every foofs_put_super for now to guarantee identical behaviour until it's cleaned up by the individual filesystem maintainers. Exceptions: - affs already has identical copy & pasted code at the beginning of affs_put_super so no need to do it twice. - xfs does the right thing without it and I have changes pending for the xfs tree touching this are so I don't really need conflicts here.. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/super.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 721236e6177..d9a29d5b1d2 100644 --- a/fs/super.c +++ b/fs/super.c @@ -311,8 +311,6 @@ void generic_shutdown_super(struct super_block *sb) invalidate_inodes(sb); lock_kernel(); - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); if (sop->put_super) sop->put_super(sb); -- cgit v1.2.3 From e5004753388dcf5e1b8a52ac0ab807d232340fbb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 May 2009 16:08:56 +0200 Subject: cleanup sync_supers Merge the write_super helper into sync_super and move the check for ->write_super earlier so that we can avoid grabbing a reference to a superblock that doesn't have it. While we're at it also add a little comment documenting sync_supers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/super.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index d9a29d5b1d2..cb19fffc768 100644 --- a/fs/super.c +++ b/fs/super.c @@ -399,16 +399,14 @@ void drop_super(struct super_block *sb) EXPORT_SYMBOL(drop_super); -static inline void write_super(struct super_block *sb) -{ - lock_super(sb); - if (sb->s_root && sb->s_dirt) - if (sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); -} - -/* +/** + * sync_supers - helper for periodic superblock writeback + * + * Call the write_super method if present on all dirty superblocks in + * the system. This is for the periodic writeback used by most older + * filesystems. For data integrity superblock writeback use + * sync_filesystems() instead. + * * Note: check the dirty flag before waiting, so we don't * hold up the sync while mounting a device. (The newly * mounted device won't need syncing.) @@ -420,12 +418,17 @@ void sync_supers(void) spin_lock(&sb_lock); restart: list_for_each_entry(sb, &super_blocks, s_list) { - if (sb->s_dirt) { + if (sb->s_op->write_super && sb->s_dirt) { sb->s_count++; spin_unlock(&sb_lock); + down_read(&sb->s_umount); - write_super(sb); + lock_super(sb); + if (sb->s_root && sb->s_dirt) + sb->s_op->write_super(sb); + unlock_super(sb); up_read(&sb->s_umount); + spin_lock(&sb_lock); if (__put_super_and_need_restart(sb)) goto restart; -- cgit v1.2.3 From 443b94baaa16771e98b29ca7c24f1e305738ffca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 May 2009 23:48:50 -0400 Subject: Make sure that all callers of remount hold s_umount exclusive Signed-off-by: Al Viro --- fs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index cb19fffc768..49f670cb9a8 100644 --- a/fs/super.c +++ b/fs/super.c @@ -579,7 +579,7 @@ static void do_emergency_remount(struct work_struct *work) list_for_each_entry(sb, &super_blocks, s_list) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + down_write(&sb->s_umount); if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { /* * ->remount_fs needs lock_kernel(). @@ -590,7 +590,8 @@ static void do_emergency_remount(struct work_struct *work) do_remount_sb(sb, MS_RDONLY, NULL, 1); unlock_kernel(); } - drop_super(sb); + up_write(&sb->s_umount); + put_super(sb); spin_lock(&sb_lock); } spin_unlock(&sb_lock); -- cgit v1.2.3 From a9e220f8322e2b0e0b8903fe00265461cffad3f0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 May 2009 22:10:44 -0400 Subject: No need to do lock_super() for exclusion in generic_shutdown_super() We can't run into contention on it. All other callers of lock_super() either hold s_umount (and we have it exclusive) or hold an active reference to superblock in question, which prevents the call of generic_shutdown_super() while the reference is held. So we can replace lock_super(s) with get_fs_excl() in generic_shutdown_super() (and corresponding change for unlock_super(), of course). Since ext4 expects s_lock held for its put_super, take lock_super() into it. The rest of filesystems do not care at all. Signed-off-by: Al Viro --- fs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 49f670cb9a8..54fd331f0ca 100644 --- a/fs/super.c +++ b/fs/super.c @@ -304,7 +304,7 @@ void generic_shutdown_super(struct super_block *sb) if (sb->s_root) { shrink_dcache_for_umount(sb); sync_filesystem(sb); - lock_super(sb); + get_fs_excl(); sb->s_flags &= ~MS_ACTIVE; /* bad name - it should be evict_inodes() */ @@ -322,7 +322,7 @@ void generic_shutdown_super(struct super_block *sb) } unlock_kernel(); - unlock_super(sb); + put_fs_excl(); } spin_lock(&sb_lock); /* should be initialized for __put_super_and_need_restart() */ -- cgit v1.2.3 From 6cfd0148425e528b859b26e436b01f23f6926224 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 May 2009 15:40:36 +0200 Subject: push BKL down into ->put_super Move BKL into ->put_super from the only caller. A couple of filesystems had trivial enough ->put_super (only kfree and NULLing of s_fs_info + stuff in there) to not get any locking: coda, cramfs, efs, hugetlbfs, omfs, qnx4, shmem, all others got the full treatment. Most of them probably don't need it, but I'd rather sort that out individually. Preferably after all the other BKL pushdowns in that area. [AV: original used to move lock_super() down as well; these changes are removed since we don't do lock_super() at all in generic_shutdown_super() now] [AV: fuse, btrfs and xfs are known to need no damn BKL, exempt] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/super.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 54fd331f0ca..bdd7158b785 100644 --- a/fs/super.c +++ b/fs/super.c @@ -309,7 +309,6 @@ void generic_shutdown_super(struct super_block *sb) /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); - lock_kernel(); if (sop->put_super) sop->put_super(sb); @@ -320,8 +319,6 @@ void generic_shutdown_super(struct super_block *sb) "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); } - - unlock_kernel(); put_fs_excl(); } spin_lock(&sb_lock); -- cgit v1.2.3 From bbd6851a3213a525128473e978b692ab6ac11aba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 10:43:07 -0400 Subject: Push lock_super() into the ->remount_fs() of filesystems that care about it Note that since we can't run into contention between remount_fs and write_super (due to exclusion on s_umount), we have to care only about filesystems that touch lock_super() on their own. Out of those ext3, ext4, hpfs, sysv and ufs do need it; fat doesn't since its ->remount_fs() only accesses assign-once data (basically, it's "we have no atime on directories and only have atime on files for vfat; force nodiratime and possibly noatime into *flags"). [folded a build fix from hch] Signed-off-by: Al Viro --- fs/super.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index bdd7158b785..2a49fed7745 100644 --- a/fs/super.c +++ b/fs/super.c @@ -556,9 +556,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); if (sb->s_op->remount_fs) { - lock_super(sb); retval = sb->s_op->remount_fs(sb, &flags, data); - unlock_super(sb); if (retval) return retval; } -- cgit v1.2.3 From 4aa98cf768b6f2ea4b204620d949a665959214f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 May 2009 13:36:58 -0400 Subject: Push BKL down into do_remount_sb() [folded fix from Jiri Slaby] Signed-off-by: Al Viro --- fs/super.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 2a49fed7745..a64f3620879 100644 --- a/fs/super.c +++ b/fs/super.c @@ -542,25 +542,33 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) shrink_dcache_sb(sb); sync_filesystem(sb); + lock_kernel(); /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { if (force) mark_files_ro(sb); - else if (!fs_may_remount_ro(sb)) + else if (!fs_may_remount_ro(sb)) { + unlock_kernel(); return -EBUSY; + } retval = vfs_dq_off(sb, 1); - if (retval < 0 && retval != -ENOSYS) + if (retval < 0 && retval != -ENOSYS) { + unlock_kernel(); return -EBUSY; + } } remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); - if (retval) + if (retval) { + unlock_kernel(); return retval; + } } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); + unlock_kernel(); if (remount_rw) vfs_dq_quota_on_remount(sb); return 0; @@ -581,9 +589,7 @@ static void do_emergency_remount(struct work_struct *work) * * What lock protects sb->s_flags?? */ - lock_kernel(); do_remount_sb(sb, MS_RDONLY, NULL, 1); - unlock_kernel(); } up_write(&sb->s_umount); put_super(sb); -- cgit v1.2.3 From ebc1ac164560a241d9bf1b7519062910c3f90a01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 May 2009 23:35:03 +0200 Subject: ->write_super lock_super pushdown Push down lock_super into ->write_super instances and remove it from the caller. Following filesystem don't need ->s_lock in ->write_super and are skipped: * bfs, nilfs2 - no other uses of s_lock and have internal locks in ->write_super * ext2 - uses BKL in ext2_write_super and has internal calls without s_lock * reiserfs - no other uses of s_lock as has reiserfs_write_lock (BKL) in ->write_super * xfs - no other uses of s_lock and uses internal lock (buffer lock on superblock buffer) to serialize ->write_super. Also xfs_fs_write_super is superflous and will go away in the next merge window Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/super.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index a64f3620879..1905f4af01c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -420,10 +420,8 @@ restart: spin_unlock(&sb_lock); down_read(&sb->s_umount); - lock_super(sb); if (sb->s_root && sb->s_dirt) sb->s_op->write_super(sb); - unlock_super(sb); up_read(&sb->s_umount); spin_lock(&sb_lock); -- cgit v1.2.3 From 337eb00a2c3a421999c39c94ce7e33545ee8baa7 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Tue, 12 May 2009 15:10:54 +0200 Subject: Push BKL down into ->remount_fs() [xfs, btrfs, capifs, shmem don't need BKL, exempt] Signed-off-by: Alessio Igor Bogani Signed-off-by: Al Viro --- fs/super.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/super.c') diff --git a/fs/super.c b/fs/super.c index 1905f4af01c..83b47416d00 100644 --- a/fs/super.c +++ b/fs/super.c @@ -540,7 +540,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) shrink_dcache_sb(sb); sync_filesystem(sb); - lock_kernel(); /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { @@ -566,7 +565,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) } } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); - unlock_kernel(); if (remount_rw) vfs_dq_quota_on_remount(sb); return 0; -- cgit v1.2.3