aboutsummaryrefslogtreecommitdiff
path: root/fs/jbd
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-11-10 09:10:44 +0100
committerIngo Molnar <mingo@elte.hu>2008-11-10 09:10:44 +0100
commita5a64498c194c82ecad3a2d67cff6231cda8d3dd (patch)
tree723d5d81419f9960b8d30ed9a2ece8a58d6c4328 /fs/jbd
parentbb93d802ae5c1949977cc6da247b218240677f11 (diff)
parentf7160c7573615ec82c691e294cf80d920b5d588d (diff)
Merge commit 'v2.6.28-rc4' into timers/rtc
Conflicts: drivers/rtc/rtc-cmos.c
Diffstat (limited to 'fs/jbd')
-rw-r--r--fs/jbd/Kconfig30
-rw-r--r--fs/jbd/checkpoint.c89
-rw-r--r--fs/jbd/commit.c10
-rw-r--r--fs/jbd/journal.c28
-rw-r--r--fs/jbd/recovery.c7
-rw-r--r--fs/jbd/transaction.c17
6 files changed, 152 insertions, 29 deletions
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
new file mode 100644
index 00000000000..4e28beeed15
--- /dev/null
+++ b/fs/jbd/Kconfig
@@ -0,0 +1,30 @@
+config JBD
+ tristate
+ help
+ This is a generic journalling layer for block devices. It is
+ currently used by the ext3 file system, but it could also be
+ used to add journal support to other file systems or block
+ devices such as RAID or LVM.
+
+ If you are using the ext3 file system, you need to say Y here.
+ If you are not using ext3 then you will probably want to say N.
+
+ To compile this device as a module, choose M here: the module will be
+ called jbd. If you are compiling ext3 into the kernel, you
+ cannot compile this code as a module.
+
+config JBD_DEBUG
+ bool "JBD (ext3) debugging support"
+ depends on JBD && DEBUG_FS
+ help
+ If you are using the ext3 journaled file system (or potentially any
+ other file system/device using JBD), this option allows you to
+ enable debugging output while the system is running, in order to
+ help track down any problems you are having. By default the
+ debugging output will be turned off.
+
+ If you select Y here, then you will be able to turn on debugging
+ with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
+ number between 1 and 5, the higher the number, the more debugging
+ output is generated. To turn debugging off again, do
+ "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index a5432bbbfb8..61f32f3868c 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
int ret = 0;
struct buffer_head *bh = jh2bh(jh);
- if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
+ if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
+ !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
@@ -114,7 +115,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
*/
void __log_wait_for_space(journal_t *journal)
{
- int nblocks;
+ int nblocks, space_left;
assert_spin_locked(&journal->j_state_lock);
nblocks = jbd_space_needed(journal);
@@ -126,14 +127,46 @@ void __log_wait_for_space(journal_t *journal)
/*
* Test again, another process may have checkpointed while we
- * were waiting for the checkpoint lock
+ * were waiting for the checkpoint lock. If there are no
+ * transactions ready to be checkpointed, try to recover
+ * journal space by calling cleanup_journal_tail(), and if
+ * that doesn't work, by waiting for the currently committing
+ * transaction to complete. If there is absolutely no way
+ * to make progress, this is either a BUG or corrupted
+ * filesystem, so abort the journal and leave a stack
+ * trace for forensic evidence.
*/
spin_lock(&journal->j_state_lock);
+ spin_lock(&journal->j_list_lock);
nblocks = jbd_space_needed(journal);
- if (__log_space_left(journal) < nblocks) {
+ space_left = __log_space_left(journal);
+ if (space_left < nblocks) {
+ int chkpt = journal->j_checkpoint_transactions != NULL;
+ tid_t tid = 0;
+
+ if (journal->j_committing_transaction)
+ tid = journal->j_committing_transaction->t_tid;
+ spin_unlock(&journal->j_list_lock);
spin_unlock(&journal->j_state_lock);
- log_do_checkpoint(journal);
+ if (chkpt) {
+ log_do_checkpoint(journal);
+ } else if (cleanup_journal_tail(journal) == 0) {
+ /* We were able to recover space; yay! */
+ ;
+ } else if (tid) {
+ log_wait_commit(journal, tid);
+ } else {
+ printk(KERN_ERR "%s: needed %d blocks and "
+ "only had %d space available\n",
+ __func__, nblocks, space_left);
+ printk(KERN_ERR "%s: no way to get more "
+ "journal space\n", __func__);
+ WARN_ON(1);
+ journal_abort(journal, 0);
+ }
spin_lock(&journal->j_state_lock);
+ } else {
+ spin_unlock(&journal->j_list_lock);
}
mutex_unlock(&journal->j_checkpoint_mutex);
}
@@ -160,21 +193,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
* buffers. Note that we take the buffers in the opposite ordering
* from the one in which they were submitted for IO.
*
+ * Return 0 on success, and return <0 if some buffers have failed
+ * to be written out.
+ *
* Called with j_list_lock held.
*/
-static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
{
struct journal_head *jh;
struct buffer_head *bh;
tid_t this_tid;
int released = 0;
+ int ret = 0;
this_tid = transaction->t_tid;
restart:
/* Did somebody clean up the transaction in the meanwhile? */
if (journal->j_checkpoint_transactions != transaction ||
transaction->t_tid != this_tid)
- return;
+ return ret;
while (!released && transaction->t_checkpoint_io_list) {
jh = transaction->t_checkpoint_io_list;
bh = jh2bh(jh);
@@ -194,6 +231,9 @@ restart:
spin_lock(&journal->j_list_lock);
goto restart;
}
+ if (unlikely(buffer_write_io_error(bh)))
+ ret = -EIO;
+
/*
* Now in whatever state the buffer currently is, we know that
* it has been written out and so we can drop it from the list
@@ -203,6 +243,8 @@ restart:
journal_remove_journal_head(bh);
__brelse(bh);
}
+
+ return ret;
}
#define NR_BATCH 64
@@ -226,7 +268,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
* Try to flush one buffer from the checkpoint list to disk.
*
* Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list.
+ * scan of the checkpoint list. Return <0 if the buffer has failed to
+ * be written out.
*
* Called with j_list_lock held and drops it if 1 is returned
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -256,6 +299,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
log_wait_commit(journal, tid);
ret = 1;
} else if (!buffer_dirty(bh)) {
+ ret = 1;
+ if (unlikely(buffer_write_io_error(bh)))
+ ret = -EIO;
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh);
@@ -263,7 +309,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
__brelse(bh);
- ret = 1;
} else {
/*
* Important: we are about to write the buffer, and
@@ -295,6 +340,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
* to disk. We submit larger chunks of data at once.
*
* The journal should be locked before calling this function.
+ * Called with j_checkpoint_mutex held.
*/
int log_do_checkpoint(journal_t *journal)
{
@@ -318,6 +364,7 @@ int log_do_checkpoint(journal_t *journal)
* OK, we need to start writing disk blocks. Take one transaction
* and write it.
*/
+ result = 0;
spin_lock(&journal->j_list_lock);
if (!journal->j_checkpoint_transactions)
goto out;
@@ -334,7 +381,7 @@ restart:
int batch_count = 0;
struct buffer_head *bhs[NR_BATCH];
struct journal_head *jh;
- int retry = 0;
+ int retry = 0, err;
while (!retry && transaction->t_checkpoint_list) {
struct buffer_head *bh;
@@ -347,6 +394,8 @@ restart:
break;
}
retry = __process_buffer(journal, jh, bhs,&batch_count);
+ if (retry < 0 && !result)
+ result = retry;
if (!retry && (need_resched() ||
spin_needbreak(&journal->j_list_lock))) {
spin_unlock(&journal->j_list_lock);
@@ -371,14 +420,18 @@ restart:
* Now we have cleaned up the first transaction's checkpoint
* list. Let's clean up the second one
*/
- __wait_cp_io(journal, transaction);
+ err = __wait_cp_io(journal, transaction);
+ if (!result)
+ result = err;
}
out:
spin_unlock(&journal->j_list_lock);
- result = cleanup_journal_tail(journal);
if (result < 0)
- return result;
- return 0;
+ journal_abort(journal, result);
+ else
+ result = cleanup_journal_tail(journal);
+
+ return (result < 0) ? result : 0;
}
/*
@@ -394,8 +447,9 @@ out:
* This is the only part of the journaling code which really needs to be
* aware of transaction aborts. Checkpointing involves writing to the
* main filesystem area rather than to the journal, so it can proceed
- * even in abort state, but we must not update the journal superblock if
- * we have an abort error outstanding.
+ * even in abort state, but we must not update the super block if
+ * checkpointing may have failed. Otherwise, we would lose some metadata
+ * buffers which should be written-back to the filesystem.
*/
int cleanup_journal_tail(journal_t *journal)
@@ -404,6 +458,9 @@ int cleanup_journal_tail(journal_t *journal)
tid_t first_tid;
unsigned long blocknr, freed;
+ if (is_journal_aborted(journal))
+ return 1;
+
/* OK, work out the oldest transaction remaining in the log, and
* the log block it starts at.
*
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ae08c057e75..25719d902c5 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal)
printk(KERN_WARNING
"JBD: Detected IO errors while flushing file data "
"on %s\n", bdevname(journal->j_fs_dev, b));
+ if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR)
+ journal_abort(journal, err);
err = 0;
}
@@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal)
jh = commit_transaction->t_buffers;
/* If we're in abort mode, we just un-journal the buffer and
- release it for background writing. */
+ release it. */
if (is_journal_aborted(journal)) {
+ clear_buffer_jbddirty(jh2bh(jh));
JBUFFER_TRACE(jh, "journal is aborting: refile");
journal_refile_buffer(journal, jh);
/* If that was the last one, we need to clean up
@@ -762,6 +765,9 @@ wait_for_iobuf:
/* AKPM: bforget here */
}
+ if (err)
+ journal_abort(journal, err);
+
jbd_debug(3, "JBD: commit phase 6\n");
if (journal_write_commit_record(journal, commit_transaction))
@@ -852,6 +858,8 @@ restart_loop:
if (buffer_jbddirty(bh)) {
JBUFFER_TRACE(jh, "add to new checkpointing trans");
__journal_insert_checkpoint(jh, commit_transaction);
+ if (is_journal_aborted(journal))
+ clear_buffer_jbddirty(bh);
JBUFFER_TRACE(jh, "refile for checkpoint writeback");
__journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index aa7143a8349..9e4fa52d7dc 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1121,9 +1121,12 @@ recovery_error:
*
* Release a journal_t structure once it is no longer in use by the
* journaled object.
+ * Return <0 if we couldn't clean up the journal.
*/
-void journal_destroy(journal_t *journal)
+int journal_destroy(journal_t *journal)
{
+ int err = 0;
+
/* Wait for the commit thread to wake up and die. */
journal_kill_thread(journal);
@@ -1146,11 +1149,16 @@ void journal_destroy(journal_t *journal)
J_ASSERT(journal->j_checkpoint_transactions == NULL);
spin_unlock(&journal->j_list_lock);
- /* We can now mark the journal as empty. */
- journal->j_tail = 0;
- journal->j_tail_sequence = ++journal->j_transaction_sequence;
if (journal->j_sb_buffer) {
- journal_update_superblock(journal, 1);
+ if (!is_journal_aborted(journal)) {
+ /* We can now mark the journal as empty. */
+ journal->j_tail = 0;
+ journal->j_tail_sequence =
+ ++journal->j_transaction_sequence;
+ journal_update_superblock(journal, 1);
+ } else {
+ err = -EIO;
+ }
brelse(journal->j_sb_buffer);
}
@@ -1160,6 +1168,8 @@ void journal_destroy(journal_t *journal)
journal_destroy_revoke(journal);
kfree(journal->j_wbuf);
kfree(journal);
+
+ return err;
}
@@ -1359,10 +1369,16 @@ int journal_flush(journal_t *journal)
spin_lock(&journal->j_list_lock);
while (!err && journal->j_checkpoint_transactions != NULL) {
spin_unlock(&journal->j_list_lock);
+ mutex_lock(&journal->j_checkpoint_mutex);
err = log_do_checkpoint(journal);
+ mutex_unlock(&journal->j_checkpoint_mutex);
spin_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
+
+ if (is_journal_aborted(journal))
+ return -EIO;
+
cleanup_journal_tail(journal);
/* Finally, mark the journal as really needing no recovery.
@@ -1384,7 +1400,7 @@ int journal_flush(journal_t *journal)
J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
spin_unlock(&journal->j_state_lock);
- return err;
+ return 0;
}
/**
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 43bc5e5ed06..db5e982c5dd 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -223,7 +223,7 @@ do { \
*/
int journal_recover(journal_t *journal)
{
- int err;
+ int err, err2;
journal_superblock_t * sb;
struct recovery_info info;
@@ -261,7 +261,10 @@ int journal_recover(journal_t *journal)
journal->j_transaction_sequence = ++info.end_transaction;
journal_clear_revoke(journal);
- sync_blockdev(journal->j_fs_dev);
+ err2 = sync_blockdev(journal->j_fs_dev);
+ if (!err)
+ err = err2;
+
return err;
}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 0540ca27a44..60d4c32c880 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -860,7 +860,6 @@ out:
* int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences
* @handle: transaction
* @bh: buffer to undo
- * @credits: store the number of taken credits here (if not NULL)
*
* Sometimes there is a need to distinguish between metadata which has
* been committed to disk and that which has not. The ext3fs code uses
@@ -954,9 +953,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
journal_t *journal = handle->h_transaction->t_journal;
int need_brelse = 0;
struct journal_head *jh;
+ int ret = 0;
if (is_handle_aborted(handle))
- return 0;
+ return ret;
jh = journal_add_journal_head(bh);
JBUFFER_TRACE(jh, "entry");
@@ -1067,7 +1067,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
time if it is redirtied */
}
- /* journal_clean_data_list() may have got there first */
+ /*
+ * We cannot remove the buffer with io error from the
+ * committing transaction, because otherwise it would
+ * miss the error and the commit would not abort.
+ */
+ if (unlikely(!buffer_uptodate(bh))) {
+ ret = -EIO;
+ goto no_journal;
+ }
+
if (jh->b_transaction != NULL) {
JBUFFER_TRACE(jh, "unfile from commit");
__journal_temp_unlink_buffer(jh);
@@ -1108,7 +1117,7 @@ no_journal:
}
JBUFFER_TRACE(jh, "exit");
journal_put_journal_head(jh);
- return 0;
+ return ret;
}
/**