From 77e841de8abac4755cc83ca224fdf71418d65380 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Sun, 12 Oct 2008 16:39:16 -0400 Subject: jbd2: abort when failed to log metadata buffers If we failed to write metadata buffers to the journal space and succeeded to write the commit record, stale data can be written back to the filesystem as metadata in the recovery phase. To avoid this, when we failed to write out metadata buffers, abort the journal before writing the commit record. We can also avoid this kind of corruption by using the journal checksum feature because it can detect invalid metadata blocks in the journal and avoid them from being replayed. So we don't need to care about asynchronous commit record writeout with a checksum. Signed-off-by: Hidehiro Kawai Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0d3814a35ed..78e4da93412 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -783,6 +783,9 @@ wait_for_iobuf: /* AKPM: bforget here */ } + if (err) + jbd2_journal_abort(journal, err); + jbd_debug(3, "JBD: commit phase 5\n"); if (!JBD2_HAS_INCOMPAT_FEATURE(journal, -- cgit v1.2.3 From 44519faf22ad6ce924ad0352d3dc200d9e0b66e8 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Fri, 10 Oct 2008 20:29:13 -0400 Subject: jbd2: fix error handling for checkpoint io When a checkpointing IO fails, current JBD2 code doesn't check the error and continue journaling. This means latest metadata can be lost from both the journal and filesystem. This patch leaves the failed metadata blocks in the journal space and aborts journaling in the case of jbd2_log_do_checkpoint(). To achieve this, we need to do: 1. don't remove the failed buffer from the checkpoint list where in the case of __try_to_free_cp_buf() because it may be released or overwritten by a later transaction 2. jbd2_log_do_checkpoint() is the last chance, remove the failed buffer from the checkpoint list and abort the journal 3. when checkpointing fails, don't update the journal super block to prevent the journaled contents from being cleaned. For safety, don't update j_tail and j_tail_sequence either 4. when checkpointing fails, notify this error to the ext4 layer so that ext4 don't clear the needs_recovery flag, otherwise the journaled contents are ignored and cleaned in the recovery phase 5. if the recovery fails, keep the needs_recovery flag 6. prevent jbd2_cleanup_journal_tail() from being called between __jbd2_journal_drop_transaction() and jbd2_journal_abort() (a possible race issue between jbd2_log_do_checkpoint()s called by jbd2_journal_flush() and __jbd2_log_wait_for_space()) Signed-off-by: Hidehiro Kawai Signed-off-by: Theodore Ts'o --- fs/jbd2/checkpoint.c | 49 +++++++++++++++++++++++++++++++++++++------------ fs/jbd2/journal.c | 28 ++++++++++++++++++++++------ fs/jbd2/recovery.c | 7 +++++-- include/linux/jbd2.h | 2 +- 4 files changed, 65 insertions(+), 21 deletions(-) diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 42895d36945..9203c3332f1 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -94,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) int ret = 0; struct buffer_head *bh = jh2bh(jh); - if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { + if (jh->b_jlist == BJ_None && !buffer_locked(bh) && + !buffer_dirty(bh) && !buffer_write_io_error(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __jbd2_journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); @@ -176,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) * buffers. Note that we take the buffers in the opposite ordering * from the one in which they were submitted for IO. * + * Return 0 on success, and return <0 if some buffers have failed + * to be written out. + * * Called with j_list_lock held. */ -static void __wait_cp_io(journal_t *journal, transaction_t *transaction) +static int __wait_cp_io(journal_t *journal, transaction_t *transaction) { struct journal_head *jh; struct buffer_head *bh; tid_t this_tid; int released = 0; + int ret = 0; this_tid = transaction->t_tid; restart: /* Did somebody clean up the transaction in the meanwhile? */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) - return; + return ret; while (!released && transaction->t_checkpoint_io_list) { jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); @@ -210,6 +215,9 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + if (unlikely(buffer_write_io_error(bh))) + ret = -EIO; + /* * Now in whatever state the buffer currently is, we know that * it has been written out and so we can drop it from the list @@ -219,6 +227,8 @@ restart: jbd2_journal_remove_journal_head(bh); __brelse(bh); } + + return ret; } #define NR_BATCH 64 @@ -242,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. + * scan of the checkpoint list. Return <0 if the buffer has failed to + * be written out. * * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it @@ -274,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { + ret = 1; + if (unlikely(buffer_write_io_error(bh))) + ret = -EIO; J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); @@ -281,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, jbd_unlock_bh_state(bh); jbd2_journal_remove_journal_head(bh); __brelse(bh); - ret = 1; } else { /* * Important: we are about to write the buffer, and @@ -314,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. + * Called with j_checkpoint_mutex held. */ int jbd2_log_do_checkpoint(journal_t *journal) { @@ -339,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) * OK, we need to start writing disk blocks. Take one transaction * and write it. */ + result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; @@ -357,7 +372,7 @@ restart: int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; struct journal_head *jh; - int retry = 0; + int retry = 0, err; while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; @@ -371,6 +386,8 @@ restart: } retry = __process_buffer(journal, jh, bhs, &batch_count, transaction); + if (retry < 0 && !result) + result = retry; if (!retry && (need_resched() || spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); @@ -395,14 +412,18 @@ restart: * Now we have cleaned up the first transaction's checkpoint * list. Let's clean up the second one */ - __wait_cp_io(journal, transaction); + err = __wait_cp_io(journal, transaction); + if (!result) + result = err; } out: spin_unlock(&journal->j_list_lock); - result = jbd2_cleanup_journal_tail(journal); if (result < 0) - return result; - return 0; + jbd2_journal_abort(journal, result); + else + result = jbd2_cleanup_journal_tail(journal); + + return (result < 0) ? result : 0; } /* @@ -418,8 +439,9 @@ out: * This is the only part of the journaling code which really needs to be * aware of transaction aborts. Checkpointing involves writing to the * main filesystem area rather than to the journal, so it can proceed - * even in abort state, but we must not update the journal superblock if - * we have an abort error outstanding. + * even in abort state, but we must not update the super block if + * checkpointing may have failed. Otherwise, we would lose some metadata + * buffers which should be written-back to the filesystem. */ int jbd2_cleanup_journal_tail(journal_t *journal) @@ -428,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) tid_t first_tid; unsigned long blocknr, freed; + if (is_journal_aborted(journal)) + return 1; + /* OK, work out the oldest transaction remaining in the log, and * the log block it starts at. * diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 01c3901c3a0..783de118de9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1451,9 +1451,12 @@ recovery_error: * * Release a journal_t structure once it is no longer in use by the * journaled object. + * Return <0 if we couldn't clean up the journal. */ -void jbd2_journal_destroy(journal_t *journal) +int jbd2_journal_destroy(journal_t *journal) { + int err = 0; + /* Wait for the commit thread to wake up and die. */ journal_kill_thread(journal); @@ -1476,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal) J_ASSERT(journal->j_checkpoint_transactions == NULL); spin_unlock(&journal->j_list_lock); - /* We can now mark the journal as empty. */ - journal->j_tail = 0; - journal->j_tail_sequence = ++journal->j_transaction_sequence; if (journal->j_sb_buffer) { - jbd2_journal_update_superblock(journal, 1); + if (!is_journal_aborted(journal)) { + /* We can now mark the journal as empty. */ + journal->j_tail = 0; + journal->j_tail_sequence = + ++journal->j_transaction_sequence; + jbd2_journal_update_superblock(journal, 1); + } else { + err = -EIO; + } brelse(journal->j_sb_buffer); } @@ -1492,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal) jbd2_journal_destroy_revoke(journal); kfree(journal->j_wbuf); kfree(journal); + + return err; } @@ -1717,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal) spin_lock(&journal->j_list_lock); while (!err && journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); + mutex_lock(&journal->j_checkpoint_mutex); err = jbd2_log_do_checkpoint(journal); + mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); + + if (is_journal_aborted(journal)) + return -EIO; + jbd2_cleanup_journal_tail(journal); /* Finally, mark the journal as really needing no recovery. @@ -1742,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal) J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); spin_unlock(&journal->j_state_lock); - return err; + return 0; } /** diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 058f50f65b7..73063285b13 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -225,7 +225,7 @@ do { \ */ int jbd2_journal_recover(journal_t *journal) { - int err; + int err, err2; journal_superblock_t * sb; struct recovery_info info; @@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal) journal->j_transaction_sequence = ++info.end_transaction; jbd2_journal_clear_revoke(journal); - sync_blockdev(journal->j_fs_dev); + err2 = sync_blockdev(journal->j_fs_dev); + if (!err) + err = err2; + return err; } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 66c3499478b..c9e7d781db3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1060,7 +1060,7 @@ extern void jbd2_journal_clear_features (journal_t *, unsigned long, unsigned long, unsigned long); extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_load (journal_t *journal); -extern void jbd2_journal_destroy (journal_t *); +extern int jbd2_journal_destroy (journal_t *); extern int jbd2_journal_recover (journal_t *journal); extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); -- cgit v1.2.3 From 7ffe1ea8949c75ecffb7a4d988bb881a9fa62fbe Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Fri, 10 Oct 2008 20:29:21 -0400 Subject: ext4: add checks for errors from jbd2 If the journal has aborted due to a checkpointing failure, we have to keep the contents of the journal space. Otherwise, the filesystem will lose uncheckpointed metadata completely and become inconsistent. To avoid this, we need to keep needs_recovery flag if checkpoint has failed. With this patch, ext4_put_super() detects a checkpointing failure from the return value of journal_destroy(), then it invokes ext4_abort() to make the filesystem read only and keep needs_recovery flag. Errors from jbd2_journal_flush() are also handled by this patch in some places. Signed-off-by: Hidehiro Kawai Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 12 ++++++++---- fs/ext4/super.c | 23 +++++++++++++++++++---- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ea27eaa0cfe..dc99b4776d5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -192,7 +192,7 @@ setversion_out: case EXT4_IOC_GROUP_EXTEND: { ext4_fsblk_t n_blocks_count; struct super_block *sb = inode->i_sb; - int err; + int err, err2; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -206,8 +206,10 @@ setversion_out: err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); - jbd2_journal_flush(EXT4_SB(sb)->s_journal); + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (err == 0) + err = err2; mnt_drop_write(filp->f_path.mnt); return err; @@ -215,7 +217,7 @@ setversion_out: case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; struct super_block *sb = inode->i_sb; - int err; + int err, err2; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -230,8 +232,10 @@ setversion_out: err = ext4_group_add(sb, &input); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); - jbd2_journal_flush(EXT4_SB(sb)->s_journal); + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (err == 0) + err = err2; mnt_drop_write(filp->f_path.mnt); return err; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0e661c56966..79bd3989e84 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -507,7 +507,8 @@ static void ext4_put_super(struct super_block *sb) ext4_mb_release(sb); ext4_ext_release(sb); ext4_xattr_put_super(sb); - jbd2_journal_destroy(sbi->s_journal); + if (jbd2_journal_destroy(sbi->s_journal) < 0) + ext4_abort(sb, __func__, "Couldn't clean up the journal"); sbi->s_journal = NULL; if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); @@ -2853,7 +2854,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, journal_t *journal = EXT4_SB(sb)->s_journal; jbd2_journal_lock_updates(journal); - jbd2_journal_flush(journal); + if (jbd2_journal_flush(journal) < 0) + goto out; + lock_super(sb); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { @@ -2862,6 +2865,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb, ext4_commit_super(sb, es, 1); } unlock_super(sb); + +out: jbd2_journal_unlock_updates(journal); } @@ -2962,7 +2967,13 @@ static void ext4_write_super_lockfs(struct super_block *sb) /* Now we set up the journal barrier. */ jbd2_journal_lock_updates(journal); - jbd2_journal_flush(journal); + + /* + * We don't want to clear needs_recovery flag when we failed + * to flush the journal. + */ + if (jbd2_journal_flush(journal) < 0) + return; /* Journal blocked and flushed, clear needs_recovery flag. */ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); @@ -3402,8 +3413,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, * otherwise be livelocked... */ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); - jbd2_journal_flush(EXT4_SB(sb)->s_journal); + err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (err) { + path_put(&nd.path); + return err; + } } err = vfs_quota_on_path(sb, type, format_id, &nd.path); -- cgit v1.2.3 From 7ad7445f60fe4d46c4c9d2a9463db180d2a3b270 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Fri, 10 Oct 2008 20:29:31 -0400 Subject: jbd2: don't dirty original metadata buffer on abort Currently, original metadata buffers are dirtied when they are unfiled whether the journal has aborted or not. Eventually these buffers will be written-back to the filesystem by pdflush. This means some metadata buffers are written to the filesystem without journaling if the journal aborts. So if both journal abort and system crash happen at the same time, the filesystem would become inconsistent state. Additionally, replaying journaled metadata can overwrite the latest metadata on the filesystem partly. Because, if the journal gets aborted, journaled metadata are preserved and replayed during the next mount not to lose uncheckpointed metadata. This would also break the consistency of the filesystem. This patch prevents original metadata buffers from being dirtied on abort by clearing BH_JBDDirty flag from those buffers. Thus, no metadata buffers are written to the filesystem without journaling. Signed-off-by: Hidehiro Kawai Signed-off-by: Theodore Ts'o --- fs/jbd2/commit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 78e4da93412..849f10496ce 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -504,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) jh = commit_transaction->t_buffers; /* If we're in abort mode, we just un-journal the buffer and - release it for background writing. */ + release it. */ if (is_journal_aborted(journal)) { + clear_buffer_jbddirty(jh2bh(jh)); JBUFFER_TRACE(jh, "journal is aborting: refile"); jbd2_journal_refile_buffer(journal, jh); /* If that was the last one, we need to clean up @@ -884,6 +885,8 @@ restart_loop: if (buffer_jbddirty(bh)) { JBUFFER_TRACE(jh, "add to new checkpointing trans"); __jbd2_journal_insert_checkpoint(jh, commit_transaction); + if (is_journal_aborted(journal)) + clear_buffer_jbddirty(bh); JBUFFER_TRACE(jh, "refile for checkpoint writeback"); __jbd2_journal_refile_buffer(jh); jbd_unlock_bh_state(bh); -- cgit v1.2.3 From 5bf5683a33f3584da6eced480967c4f7e11515a8 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Fri, 10 Oct 2008 22:12:43 -0400 Subject: ext4: add an option to control error handling on file data If the journal doesn't abort when it gets an IO error in file data blocks, the file data corruption will spread silently. Because most of applications and commands do buffered writes without fsync(), they don't notice the IO error. It's scary for mission critical systems. On the other hand, if the journal aborts whenever it gets an IO error in file data blocks, the system will easily become inoperable. So this patch introduces a filesystem option to determine whether it aborts the journal or just call printk() when it gets an IO error in file data. If you mount an ext4 fs with data_err=abort option, it aborts on file data write error. If you mount it with data_err=ignore, it doesn't abort, just call printk(). data_err=ignore is the default. Here is the corresponding patch of the ext3 version: http://kerneltrap.org/mailarchive/linux-kernel/2008/9/9/3239374 Signed-off-by: Hidehiro Kawai Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4.txt | 5 +++++ fs/ext4/ext4.h | 2 ++ fs/ext4/super.c | 16 ++++++++++++++++ fs/jbd2/commit.c | 2 ++ include/linux/jbd2.h | 3 +++ 5 files changed, 28 insertions(+) diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 74484e69640..eb154ef36c2 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -223,6 +223,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error. errors=continue Keep going on a filesystem error. errors=panic Panic and halt the machine if an error occurs. +data_err=ignore(*) Just print an error message if an error occurs + in a file data buffer in ordered mode. +data_err=abort Abort the journal if an error occurs in a file + data buffer in ordered mode. + grpid Give objects the same group ID as their creator. bsdgroups diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f46a513a515..6690a41cdd9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -540,6 +540,8 @@ do { \ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ +#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ + /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 79bd3989e84..014677b8e22 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -778,6 +778,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",inode_readahead_blks=%u", sbi->s_inode_readahead_blks); + if (test_opt(sb, DATA_ERR_ABORT)) + seq_puts(seq, ",data_err=abort"); + ext4_show_quota_options(seq, sb); return 0; } @@ -907,6 +910,7 @@ enum { Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, @@ -953,6 +957,8 @@ static match_table_t tokens = { {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, + {Opt_data_err_abort, "data_err=abort"}, + {Opt_data_err_ignore, "data_err=ignore"}, {Opt_offusrjquota, "usrjquota="}, {Opt_usrjquota, "usrjquota=%s"}, {Opt_offgrpjquota, "grpjquota="}, @@ -1187,6 +1193,12 @@ static int parse_options(char *options, struct super_block *sb, sbi->s_mount_opt |= data_opt; } break; + case Opt_data_err_abort: + set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); + break; + case Opt_data_err_ignore: + clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); + break; #ifdef CONFIG_QUOTA case Opt_usrjquota: qtype = USRQUOTA; @@ -2535,6 +2547,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) journal->j_flags |= JBD2_BARRIER; else journal->j_flags &= ~JBD2_BARRIER; + if (test_opt(sb, DATA_ERR_ABORT)) + journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; + else + journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; spin_unlock(&journal->j_state_lock); } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 849f10496ce..0abe02c4242 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -684,6 +684,8 @@ start_journal_io: printk(KERN_WARNING "JBD2: Detected IO errors while flushing file data " "on %s\n", journal->j_devname); + if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) + jbd2_journal_abort(journal, err); err = 0; } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index c9e7d781db3..d2e91ea998f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -967,6 +967,9 @@ struct journal_s #define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ #define JBD2_BARRIER 0x020 /* Use IDE barriers */ +#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file + * data write error in ordered + * mode */ /* * Function declarations for the journaling transaction and buffer -- cgit v1.2.3 From 3244fcb1ae03362e4aa8cb1a9039fbfd61661859 Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Sun, 12 Oct 2008 17:27:49 -0400 Subject: ext4: fix build failure without procfs fs/ext4/super.c: In function 'ext4_fill_super': fs/ext4/super.c:2226: error: 'ext4_ui_proc_fops' undeclared (first use in this function) fs/ext4/super.c:2226: error: (Each undeclared identifier is reported only once fs/ext4/super.c:2226: error: for each function it appears in.) Signed-off-by: Alexander Beregalov Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 014677b8e22..fb940c22ab0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2231,6 +2231,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } +#ifdef CONFIG_PROC_FS if (ext4_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); @@ -2238,6 +2239,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, &ext4_ui_proc_fops, &sbi->s_inode_readahead_blks); +#endif bgl_lock_init(&sbi->s_blockgroup_lock); -- cgit v1.2.3 From f319fb8bf6899e08bdb8d1e09a4e7a129dfa2312 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sun, 12 Oct 2008 15:53:01 -0400 Subject: ext4: fix kconfig typo and extra whitespace Signed-off-by: Jan Engelhardt Signed-off-by: Theodore Ts'o --- fs/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index 40183d94b68..f54a157a029 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -170,8 +170,8 @@ config EXT4DEV_COMPAT help Starting with 2.6.28, the name of the ext4 filesystem was renamed from ext4dev to ext4. Unfortunately there are some - lagecy userspace programs (such as klibc's fstype) have - "ext4dev" hardcoded. + legacy userspace programs (such as klibc's fstype) have + "ext4dev" hardcoded. To enable backwards compatibility so that systems that are still expecting to mount ext4 filesystems using ext4dev, -- cgit v1.2.3