diff options
Diffstat (limited to 'fs/ext3')
-rw-r--r-- | fs/ext3/Kconfig | 32 | ||||
-rw-r--r-- | fs/ext3/acl.c | 8 | ||||
-rw-r--r-- | fs/ext3/acl.h | 4 | ||||
-rw-r--r-- | fs/ext3/dir.c | 3 | ||||
-rw-r--r-- | fs/ext3/file.c | 63 | ||||
-rw-r--r-- | fs/ext3/fsync.c | 12 | ||||
-rw-r--r-- | fs/ext3/inode.c | 63 | ||||
-rw-r--r-- | fs/ext3/namei.c | 4 | ||||
-rw-r--r-- | fs/ext3/super.c | 44 |
9 files changed, 105 insertions, 128 deletions
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index fb3c1a21b13..522b15498f4 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig @@ -29,23 +29,25 @@ config EXT3_FS module will be called ext3. config EXT3_DEFAULTS_TO_ORDERED - bool "Default to 'data=ordered' in ext3 (legacy option)" + bool "Default to 'data=ordered' in ext3" depends on EXT3_FS help - If a filesystem does not explicitly specify a data ordering - mode, and the journal capability allowed it, ext3 used to - historically default to 'data=ordered'. - - That was a rather unfortunate choice, because it leads to all - kinds of latency problems, and the 'data=writeback' mode is more - appropriate these days. - - You should probably always answer 'n' here, and if you really - want to use 'data=ordered' mode, set it in the filesystem itself - with 'tune2fs -o journal_data_ordered'. - - But if you really want to enable the legacy default, you can do - so by answering 'y' to this question. + The journal mode options for ext3 have different tradeoffs + between when data is guaranteed to be on disk and + performance. The use of "data=writeback" can cause + unwritten data to appear in files after an system crash or + power failure, which can be a security issue. However, + "data=ordered" mode can also result in major performance + problems, including seconds-long delays before an fsync() + call returns. For details, see: + + http://ext4.wiki.kernel.org/index.php/Ext3_data_mode_tradeoffs + + If you have been historically happy with ext3's performance, + data=ordered mode will be a safe choice and you should + answer 'y' here. If you understand the reliability and data + privacy issues of data=writeback and are willing to make + that trade off, answer 'n'. config EXT3_FS_XATTR bool "Ext3 extended attributes" diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index e167bae37ef..c9b0df376b5 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -238,7 +238,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, return error; } -static int +int ext3_check_acl(struct inode *inode, int mask) { struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); @@ -254,12 +254,6 @@ ext3_check_acl(struct inode *inode, int mask) return -EAGAIN; } -int -ext3_permission(struct inode *inode, int mask) -{ - return generic_permission(inode, mask, ext3_check_acl); -} - /* * Initialize the ACLs of a new inode. Called from ext3_new_inode. * diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 07d15a3a596..597334626de 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size) #ifdef CONFIG_EXT3_FS_POSIX_ACL /* acl.c */ -extern int ext3_permission (struct inode *, int); +extern int ext3_check_acl (struct inode *, int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); #else /* CONFIG_EXT3_FS_POSIX_ACL */ #include <linux/sched.h> -#define ext3_permission NULL +#define ext3_check_acl NULL static inline int ext3_acl_chmod(struct inode *inode) diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 3d724a95882..373fa90c796 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp, struct buffer_head *bh = NULL; map_bh.b_state = 0; - err = ext3_get_blocks_handle(NULL, inode, blk, 1, - &map_bh, 0, 0); + err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0); if (err > 0) { pgoff_t index = map_bh.b_blocknr >> (PAGE_CACHE_SHIFT - inode->i_blkbits); diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 5b49704b231..388bbdfa0b4 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp) return 0; } -static ssize_t -ext3_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_path.dentry->d_inode; - ssize_t ret; - int err; - - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); - - /* - * Skip flushing if there was an error, or if nothing was written. - */ - if (ret <= 0) - return ret; - - /* - * If the inode is IS_SYNC, or is O_SYNC and we are doing data - * journalling then we need to make sure that we force the transaction - * to disk to keep all metadata uptodate synchronously. - */ - if (file->f_flags & O_SYNC) { - /* - * If we are non-data-journaled, then the dirty data has - * already been flushed to backing store by generic_osync_inode, - * and the inode has been flushed too if there have been any - * modifications other than mere timestamp updates. - * - * Open question --- do we care about flushing timestamps too - * if the inode is IS_SYNC? - */ - if (!ext3_should_journal_data(inode)) - return ret; - - goto force_commit; - } - - /* - * So we know that there has been no forced data flush. If the inode - * is marked IS_SYNC, we need to force one ourselves. - */ - if (!IS_SYNC(inode)) - return ret; - - /* - * Open question #2 --- should we force data to disk here too? If we - * don't, the only impact is that data=writeback filesystems won't - * flush data to disk automatically on IS_SYNC, only metadata (but - * historically, that is what ext2 has done.) - */ - -force_commit: - err = ext3_force_commit(inode->i_sb); - if (err) - return err; - return ret; -} - const struct file_operations ext3_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, - .aio_write = ext3_file_write, + .aio_write = generic_file_aio_write, .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, @@ -137,7 +78,7 @@ const struct inode_operations ext3_file_inode_operations = { .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif - .permission = ext3_permission, + .check_acl = ext3_check_acl, .fiemap = ext3_fiemap, }; diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d33634119e1..451d166bbe9 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -23,6 +23,7 @@ */ #include <linux/time.h> +#include <linux/blkdev.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/writeback.h> @@ -73,7 +74,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) } if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - goto out; + goto flush; /* * The VFS has written the file data. If the inode is unaltered @@ -85,7 +86,16 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync) .nr_to_write = 0, /* sys_fsync did this */ }; ret = sync_inode(inode, &wbc); + goto out; } +flush: + /* + * In case we didn't commit a transaction, we have to flush + * disk caches manually so that data really is on persistent + * storage + */ + if (test_opt(inode->i_sb, BARRIER)) + blkdev_issue_flush(inode->i_sb->s_bdev, NULL); out: return ret; } diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5f51fed5c75..acf1b142332 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -172,10 +172,21 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) * so before we call here everything must be consistently dirtied against * this transaction. */ -static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) +static int truncate_restart_transaction(handle_t *handle, struct inode *inode) { + int ret; + jbd_debug(2, "restarting handle %p\n", handle); - return ext3_journal_restart(handle, blocks_for_truncate(inode)); + /* + * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle + * At this moment, get_block can be called only for blocks inside + * i_size since page cache has been already dropped and writes are + * blocked by i_mutex. So we can safely drop the truncate_mutex. + */ + mutex_unlock(&EXT3_I(inode)->truncate_mutex); + ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); + mutex_lock(&EXT3_I(inode)->truncate_mutex); + return ret; } /* @@ -788,7 +799,7 @@ err_out: int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, - int create, int extend_disksize) + int create) { int err = -EIO; int offsets[4]; @@ -911,13 +922,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, if (!err) err = ext3_splice_branch(handle, inode, iblock, partial, indirect_blks, count); - /* - * i_disksize growing is protected by truncate_mutex. Don't forget to - * protect it if you're about to implement concurrent - * ext3_get_block() -bzzz - */ - if (!err && extend_disksize && inode->i_size > ei->i_disksize) - ei->i_disksize = inode->i_size; mutex_unlock(&ei->truncate_mutex); if (err) goto cleanup; @@ -972,7 +976,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock, } ret = ext3_get_blocks_handle(handle, inode, iblock, - max_blocks, bh_result, create, 0); + max_blocks, bh_result, create); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; @@ -1005,7 +1009,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, dummy.b_blocknr = -1000; buffer_trace_init(&dummy.b_history); err = ext3_get_blocks_handle(handle, inode, block, 1, - &dummy, create, 1); + &dummy, create); /* * ext3_get_blocks_handle() returns number of blocks * mapped. 0 in case of a HOLE. @@ -1193,15 +1197,16 @@ write_begin_failed: * i_size_read because we hold i_mutex. * * Add inode to orphan list in case we crash before truncate - * finishes. + * finishes. Do this only if ext3_can_truncate() agrees so + * that orphan processing code is happy. */ - if (pos + len > inode->i_size) + if (pos + len > inode->i_size && ext3_can_truncate(inode)) ext3_orphan_add(handle, inode); ext3_journal_stop(handle); unlock_page(page); page_cache_release(page); if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); + ext3_truncate(inode); } if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; @@ -1287,7 +1292,7 @@ static int ext3_ordered_write_end(struct file *file, * There may be allocated blocks outside of i_size because * we failed to copy some data. Prepare for truncate. */ - if (pos + len > inode->i_size) + if (pos + len > inode->i_size && ext3_can_truncate(inode)) ext3_orphan_add(handle, inode); ret2 = ext3_journal_stop(handle); if (!ret) @@ -1296,7 +1301,7 @@ static int ext3_ordered_write_end(struct file *file, page_cache_release(page); if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); + ext3_truncate(inode); return ret ? ret : copied; } @@ -1315,14 +1320,14 @@ static int ext3_writeback_write_end(struct file *file, * There may be allocated blocks outside of i_size because * we failed to copy some data. Prepare for truncate. */ - if (pos + len > inode->i_size) + if (pos + len > inode->i_size && ext3_can_truncate(inode)) ext3_orphan_add(handle, inode); ret = ext3_journal_stop(handle); unlock_page(page); page_cache_release(page); if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); + ext3_truncate(inode); return ret ? ret : copied; } @@ -1358,7 +1363,7 @@ static int ext3_journalled_write_end(struct file *file, * There may be allocated blocks outside of i_size because * we failed to copy some data. Prepare for truncate. */ - if (pos + len > inode->i_size) + if (pos + len > inode->i_size && ext3_can_truncate(inode)) ext3_orphan_add(handle, inode); EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; if (inode->i_size > EXT3_I(inode)->i_disksize) { @@ -1375,7 +1380,7 @@ static int ext3_journalled_write_end(struct file *file, page_cache_release(page); if (pos + len > inode->i_size) - vmtruncate(inode, inode->i_size); + ext3_truncate(inode); return ret ? ret : copied; } @@ -1825,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext3_writeback_aops = { @@ -1840,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext3_journalled_aops = { @@ -1854,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = { .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; void ext3_set_aops(struct inode *inode) @@ -2078,7 +2086,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode, ext3_journal_dirty_metadata(handle, bh); } ext3_mark_inode_dirty(handle, inode); - ext3_journal_test_restart(handle, inode); + truncate_restart_transaction(handle, inode); if (bh) { BUFFER_TRACE(bh, "retaking write access"); ext3_journal_get_write_access(handle, bh); @@ -2288,7 +2296,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, return; if (try_to_extend_transaction(handle, inode)) { ext3_mark_inode_dirty(handle, inode); - ext3_journal_test_restart(handle, inode); + truncate_restart_transaction(handle, inode); } ext3_free_blocks(handle, inode, nr, 1); @@ -2898,6 +2906,10 @@ static int ext3_do_update_inode(handle_t *handle, struct buffer_head *bh = iloc->bh; int err = 0, rc, block; +again: + /* we can't allow multiple procs in here at once, its a bit racey */ + lock_buffer(bh); + /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ if (ei->i_state & EXT3_STATE_NEW) @@ -2957,16 +2969,20 @@ static int ext3_do_update_inode(handle_t *handle, /* If this is the first large file * created, add a flag to the superblock. */ + unlock_buffer(bh); err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (err) goto out_brelse; + ext3_update_dynamic_rev(sb); EXT3_SET_RO_COMPAT_FEATURE(sb, EXT3_FEATURE_RO_COMPAT_LARGE_FILE); handle->h_sync = 1; err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); + /* get our lock and start over */ + goto again; } } } @@ -2989,6 +3005,7 @@ static int ext3_do_update_inode(handle_t *handle, raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); + unlock_buffer(bh); rc = ext3_journal_dirty_metadata(handle, bh); if (!err) err = rc; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 6ff7b973023..aad6400c9b7 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2445,7 +2445,7 @@ const struct inode_operations ext3_dir_inode_operations = { .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif - .permission = ext3_permission, + .check_acl = ext3_check_acl, }; const struct inode_operations ext3_special_inode_operations = { @@ -2456,5 +2456,5 @@ const struct inode_operations ext3_special_inode_operations = { .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif - .permission = ext3_permission, + .check_acl = ext3_check_acl, }; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 524b349c629..72743d36050 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -543,6 +543,19 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl #endif } +static char *data_mode_string(unsigned long mode) +{ + switch (mode) { + case EXT3_MOUNT_JOURNAL_DATA: + return "journal"; + case EXT3_MOUNT_ORDERED_DATA: + return "ordered"; + case EXT3_MOUNT_WRITEBACK_DATA: + return "writeback"; + } + return "unknown"; +} + /* * Show an option if * - it's set to a non-default value OR @@ -616,13 +629,8 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); - if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) - seq_puts(seq, ",data=journal"); - else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) - seq_puts(seq, ",data=ordered"); - else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) - seq_puts(seq, ",data=writeback"); - + seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt & + EXT3_MOUNT_DATA_FLAGS)); if (test_opt(sb, DATA_ERR_ABORT)) seq_puts(seq, ",data_err=abort"); @@ -712,7 +720,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, static ssize_t ext3_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); -static struct dquot_operations ext3_quota_operations = { +static const struct dquot_operations ext3_quota_operations = { .initialize = dquot_initialize, .drop = dquot_drop, .alloc_space = dquot_alloc_space, @@ -729,7 +737,7 @@ static struct dquot_operations ext3_quota_operations = { .destroy_dquot = dquot_destroy, }; -static struct quotactl_ops ext3_qctl_operations = { +static const struct quotactl_ops ext3_qctl_operations = { .quota_on = ext3_quota_on, .quota_off = vfs_quota_off, .quota_sync = vfs_quota_sync, @@ -1024,12 +1032,18 @@ static int parse_options (char *options, struct super_block *sb, datacheck: if (is_remount) { if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) - != data_opt) { - printk(KERN_ERR - "EXT3-fs: cannot change data " - "mode on remount\n"); - return 0; - } + == data_opt) + break; + printk(KERN_ERR + "EXT3-fs (device %s): Cannot change " + "data mode on remount. The filesystem " + "is mounted in data=%s mode and you " + "try to remount it in data=%s mode.\n", + sb->s_id, + data_mode_string(sbi->s_mount_opt & + EXT3_MOUNT_DATA_FLAGS), + data_mode_string(data_opt)); + return 0; } else { sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; sbi->s_mount_opt |= data_opt; |