aboutsummaryrefslogtreecommitdiff
path: root/fs/jbd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd')
-rw-r--r--fs/jbd/commit.c64
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd/revoke.c163
-rw-r--r--fs/jbd/transaction.c57
4 files changed, 181 insertions, 111 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498c..2eccbfaa1d4 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
/*
* When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
* After the transaction commits, these pages are left on the LRU, with no
* ->mapping, and with attached buffers. These pages are trivially reclaimable
* by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
* So here, we have a buffer which has just come off the forget list. Look to
* see if we can strip all buffers from the backing page.
*
- * Called under lock_journal(), and possibly under journal_datalist_lock. The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under journal->j_list_lock. The caller provided us with a ref
+ * against the buffer, and we drop that here.
*/
static void release_buffer_page(struct buffer_head *bh)
{
@@ -78,6 +78,19 @@ nope:
}
/*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+ if (buffer_freed(bh)) {
+ clear_buffer_freed(bh);
+ release_buffer_page(bh);
+ } else
+ put_bh(bh);
+}
+
+/*
* Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
* held. For ranking reasons we must trylock. If we lose, schedule away and
* return 0. j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
/*
* Submit all the data buffers to disk
*/
-static void journal_submit_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
transaction_t *commit_transaction)
{
struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
int locked;
int bufs = 0;
struct buffer_head **wbuf = journal->j_wbuf;
+ int err = 0;
/*
* Whenever we unlock the journal and sleep, things can get added
@@ -231,7 +245,7 @@ write_out_data:
if (locked)
unlock_buffer(bh);
BUFFER_TRACE(bh, "already cleaned up");
- put_bh(bh);
+ release_data_buffer(bh);
continue;
}
if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
put_bh(bh);
} else {
BUFFER_TRACE(bh, "writeout complete: unfile");
+ if (unlikely(!buffer_uptodate(bh)))
+ err = -EIO;
__journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
journal_remove_journal_head(bh);
- /* Once for our safety reference, once for
+ /* One for our safety reference, other for
* journal_remove_journal_head() */
put_bh(bh);
- put_bh(bh);
+ release_data_buffer(bh);
}
if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
}
spin_unlock(&journal->j_list_lock);
journal_do_submit_data(wbuf, bufs);
+
+ return err;
}
/*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
- err = 0;
- journal_submit_data_buffers(journal, commit_transaction);
+ err = journal_submit_data_buffers(journal, commit_transaction);
/*
* Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
- if (unlikely(!buffer_uptodate(bh)))
- err = -EIO;
spin_lock(&journal->j_list_lock);
}
+ if (unlikely(!buffer_uptodate(bh))) {
+ if (TestSetPageLocked(bh->b_page)) {
+ spin_unlock(&journal->j_list_lock);
+ lock_page(bh->b_page);
+ spin_lock(&journal->j_list_lock);
+ }
+ if (bh->b_page->mapping)
+ set_bit(AS_EIO, &bh->b_page->mapping->flags);
+
+ unlock_page(bh->b_page);
+ SetPageError(bh->b_page);
+ err = -EIO;
+ }
if (!inverted_lock(journal, bh)) {
put_bh(bh);
spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
} else {
jbd_unlock_bh_state(bh);
}
- put_bh(bh);
+ release_data_buffer(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
- if (err)
- journal_abort(journal, err);
+ if (err) {
+ char b[BDEVNAME_SIZE];
- journal_write_revoke_records(journal, commit_transaction);
+ printk(KERN_WARNING
+ "JBD: Detected IO errors while flushing file data "
+ "on %s\n", bdevname(journal->j_fs_dev, b));
+ err = 0;
+ }
- jbd_debug(3, "JBD: commit phase 2\n");
+ journal_write_revoke_records(journal, commit_transaction);
/*
* If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c..aa7143a8349 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
EXPORT_SYMBOL(journal_create);
EXPORT_SYMBOL(journal_load);
EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
EXPORT_SYMBOL(journal_abort);
EXPORT_SYMBOL(journal_errno);
EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
static void journal_destroy_journal_head_cache(void)
{
- J_ASSERT(journal_head_cache != NULL);
- kmem_cache_destroy(journal_head_cache);
- journal_head_cache = NULL;
+ if (journal_head_cache) {
+ kmem_cache_destroy(journal_head_cache);
+ journal_head_cache = NULL;
+ }
}
/*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4..c7bd649bbbd 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
return NULL;
}
+void journal_destroy_revoke_caches(void)
+{
+ if (revoke_record_cache) {
+ kmem_cache_destroy(revoke_record_cache);
+ revoke_record_cache = NULL;
+ }
+ if (revoke_table_cache) {
+ kmem_cache_destroy(revoke_table_cache);
+ revoke_table_cache = NULL;
+ }
+}
+
int __init journal_init_revoke_caches(void)
{
+ J_ASSERT(!revoke_record_cache);
+ J_ASSERT(!revoke_table_cache);
+
revoke_record_cache = kmem_cache_create("revoke_record",
sizeof(struct jbd_revoke_record_s),
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
if (!revoke_record_cache)
- return -ENOMEM;
+ goto record_cache_failure;
revoke_table_cache = kmem_cache_create("revoke_table",
sizeof(struct jbd_revoke_table_s),
0, SLAB_TEMPORARY, NULL);
- if (!revoke_table_cache) {
- kmem_cache_destroy(revoke_record_cache);
- revoke_record_cache = NULL;
- return -ENOMEM;
- }
+ if (!revoke_table_cache)
+ goto table_cache_failure;
+
return 0;
-}
-void journal_destroy_revoke_caches(void)
-{
- kmem_cache_destroy(revoke_record_cache);
- revoke_record_cache = NULL;
- kmem_cache_destroy(revoke_table_cache);
- revoke_table_cache = NULL;
+table_cache_failure:
+ journal_destroy_revoke_caches();
+record_cache_failure:
+ return -ENOMEM;
}
-/* Initialise the revoke table for a given journal to a given size. */
-
-int journal_init_revoke(journal_t *journal, int hash_size)
+static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
{
- int shift, tmp;
+ int shift = 0;
+ int tmp = hash_size;
+ struct jbd_revoke_table_s *table;
- J_ASSERT (journal->j_revoke_table[0] == NULL);
+ table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+ if (!table)
+ goto out;
- shift = 0;
- tmp = hash_size;
while((tmp >>= 1UL) != 0UL)
shift++;
- journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[0])
- return -ENOMEM;
- journal->j_revoke = journal->j_revoke_table[0];
-
- /* Check that the hash_size is a power of two */
- J_ASSERT(is_power_of_2(hash_size));
-
- journal->j_revoke->hash_size = hash_size;
-
- journal->j_revoke->hash_shift = shift;
-
- journal->j_revoke->hash_table =
+ table->hash_size = hash_size;
+ table->hash_shift = shift;
+ table->hash_table =
kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- journal->j_revoke = NULL;
- return -ENOMEM;
+ if (!table->hash_table) {
+ kmem_cache_free(revoke_table_cache, table);
+ table = NULL;
+ goto out;
}
for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ INIT_LIST_HEAD(&table->hash_table[tmp]);
- journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[1]) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- return -ENOMEM;
+out:
+ return table;
+}
+
+static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
+{
+ int i;
+ struct list_head *hash_list;
+
+ for (i = 0; i < table->hash_size; i++) {
+ hash_list = &table->hash_table[i];
+ J_ASSERT(list_empty(hash_list));
}
- journal->j_revoke = journal->j_revoke_table[1];
+ kfree(table->hash_table);
+ kmem_cache_free(revoke_table_cache, table);
+}
- /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+ J_ASSERT(journal->j_revoke_table[0] == NULL);
J_ASSERT(is_power_of_2(hash_size));
- journal->j_revoke->hash_size = hash_size;
+ journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[0])
+ goto fail0;
- journal->j_revoke->hash_shift = shift;
+ journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[1])
+ goto fail1;
- journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
- journal->j_revoke = NULL;
- return -ENOMEM;
- }
-
- for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ journal->j_revoke = journal->j_revoke_table[1];
spin_lock_init(&journal->j_revoke_lock);
return 0;
-}
-/* Destoy a journal's revoke table. The table must already be empty! */
+fail1:
+ journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+ return -ENOMEM;
+}
+/* Destroy a journal's revoke table. The table must already be empty! */
void journal_destroy_revoke(journal_t *journal)
{
- struct jbd_revoke_table_s *table;
- struct list_head *hash_list;
- int i;
-
- table = journal->j_revoke_table[0];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
- journal->j_revoke = NULL;
-
- table = journal->j_revoke_table[1];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
journal->j_revoke = NULL;
+ if (journal->j_revoke_table[0])
+ journal_destroy_revoke_table(journal->j_revoke_table[0]);
+ if (journal->j_revoke_table[1])
+ journal_destroy_revoke_table(journal->j_revoke_table[1]);
}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23..8dee3200750 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@ out:
return;
}
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+ transaction_t *transaction = NULL;
+ tid_t tid;
+
+ spin_lock(&journal->j_state_lock);
+ transaction = journal->j_committing_transaction;
+
+ if (!transaction) {
+ spin_unlock(&journal->j_state_lock);
+ return;
+ }
+
+ tid = transaction->t_tid;
+ spin_unlock(&journal->j_state_lock);
+ log_wait_commit(journal, tid);
+}
/**
* int journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
*
*
* For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
* journal_try_to_free_buffer() is changing its state. But that
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
+ *
+ * Return 0 on failure, 1 on success
*/
int journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t unused_gfp_mask)
+ struct page *page, gfp_t gfp_mask)
{
struct buffer_head *head;
struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
if (buffer_jbd(bh))
goto busy;
} while ((bh = bh->b_this_page) != head);
+
ret = try_to_free_buffers(page);
+
+ /*
+ * There are a number of places where journal_try_to_free_buffers()
+ * could race with journal_commit_transaction(), the later still
+ * holds the reference to the buffers to free while processing them.
+ * try_to_free_buffers() failed to free those buffers. Some of the
+ * caller of releasepage() request page buffers to be dropped, otherwise
+ * treat the fail-to-free as errors (such as generic_file_direct_IO())
+ *
+ * So, if the caller of try_to_release_page() wants the synchronous
+ * behaviour(i.e make sure buffers are dropped upon return),
+ * let's wait for the current transaction to finish flush of
+ * dirty data buffers, then try to free those buffers again,
+ * with the journal locked.
+ */
+ if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+ journal_wait_for_transaction_sync_data(journal);
+ ret = try_to_free_buffers(page);
+ }
+
busy:
return ret;
}