aboutsummaryrefslogtreecommitdiff
path: root/fs/jbd2
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-02-18 13:19:25 +1100
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-02-18 13:19:25 +1100
commit82a0a1cc8f94bc59e5919715bc03fc8353fa770d (patch)
treec25f19a0ae1f7cb75319b585c1d473cfed463f22 /fs/jbd2
parent8d30c14cab30d405a05f2aaceda1e9ad57800f36 (diff)
parent5955c7a2cfb6a35429adea5dc480002b15ca8cfc (diff)
Merge commit 'origin/master' into next
Manual merge of: arch/powerpc/include/asm/pgtable-ppc32.h
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jbd2/transaction.c42
2 files changed, 42 insertions, 17 deletions
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb343008ede..58144102bf2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
}
/*
- * Called under j_state_lock. Returns true if a transaction was started.
+ * Called under j_state_lock. Returns true if a transaction commit was started.
*/
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
/*
* Start a commit of the current running transaction (if any). Returns true
- * if a transaction was started, and fills its tid in at *ptid
+ * if a transaction is going to be committed (or is currently already
+ * committing), and fills its tid in at *ptid
*/
int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid;
- ret = __jbd2_log_start_commit(journal, tid);
- if (ret && ptid)
+ __jbd2_log_start_commit(journal, tid);
+ /* There's a running transaction and we've just made sure
+ * it's commit has been scheduled. */
+ if (ptid)
*ptid = tid;
- } else if (journal->j_committing_transaction && ptid) {
+ ret = 1;
+ } else if (journal->j_committing_transaction) {
/*
* If ext3_write_super() recently started a commit, then we
* have to wait for completion of that transaction
*/
- *ptid = journal->j_committing_transaction->t_tid;
+ if (ptid)
+ *ptid = journal->j_committing_transaction->t_tid;
ret = 1;
}
spin_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 46b4e347ed7..28ce21d8598 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2129,26 +2129,46 @@ done:
}
/*
- * This function must be called when inode is journaled in ordered mode
- * before truncation happens. It starts writeout of truncated part in
- * case it is in the committing transaction so that we stand to ordered
- * mode consistency guarantees.
+ * File truncate and transaction commit interact with each other in a
+ * non-trivial way. If a transaction writing data block A is
+ * committing, we cannot discard the data by truncate until we have
+ * written them. Otherwise if we crashed after the transaction with
+ * write has committed but before the transaction with truncate has
+ * committed, we could see stale data in block A. This function is a
+ * helper to solve this problem. It starts writeout of the truncated
+ * part in case it is in the committing transaction.
+ *
+ * Filesystem code must call this function when inode is journaled in
+ * ordered mode before truncation happens and after the inode has been
+ * placed on orphan list with the new inode size. The second condition
+ * avoids the race that someone writes new data and we start
+ * committing the transaction after this function has been called but
+ * before a transaction for truncate is started (and furthermore it
+ * allows us to optimize the case where the addition to orphan list
+ * happens in the same transaction as write --- we don't have to write
+ * any data in such case).
*/
-int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
+int jbd2_journal_begin_ordered_truncate(journal_t *journal,
+ struct jbd2_inode *jinode,
loff_t new_size)
{
- journal_t *journal;
- transaction_t *commit_trans;
+ transaction_t *inode_trans, *commit_trans;
int ret = 0;
- if (!inode->i_transaction && !inode->i_next_transaction)
+ /* This is a quick check to avoid locking if not necessary */
+ if (!jinode->i_transaction)
goto out;
- journal = inode->i_transaction->t_journal;
+ /* Locks are here just to force reading of recent values, it is
+ * enough that the transaction was not committing before we started
+ * a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock);
- if (inode->i_transaction == commit_trans) {
- ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
+ spin_lock(&journal->j_list_lock);
+ inode_trans = jinode->i_transaction;
+ spin_unlock(&journal->j_list_lock);
+ if (inode_trans == commit_trans) {
+ ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
new_size, LLONG_MAX);
if (ret)
jbd2_journal_abort(journal, ret);