aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/inode.c115
-rw-r--r--fs/ext4/mballoc.c164
-rw-r--r--fs/ext4/migrate.c123
-rw-r--r--fs/ext4/namei.c1
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/jbd/commit.c14
-rw-r--r--fs/jbd2/commit.c10
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--include/linux/ext4_fs.h7
9 files changed, 270 insertions, 177 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f4e38745224..7dd9b50d5eb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -892,7 +892,16 @@ out:
return err;
}
-#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
+/*
+ * Number of credits we need for writing DIO_MAX_BLOCKS:
+ * We need sb + group descriptor + bitmap + inode -> 4
+ * For B blocks with A block pointers per block we need:
+ * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
+ * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
+ */
+#define DIO_CREDITS 25
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
@@ -939,49 +948,31 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
handle_t *handle = ext4_journal_current_handle();
- int ret = 0;
+ int ret = 0, started = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
- if (!create)
- goto get_block; /* A read */
-
- if (max_blocks == 1)
- goto get_block; /* A single block get */
-
- if (handle->h_transaction->t_state == T_LOCKED) {
- /*
- * Huge direct-io writes can hold off commits for long
- * periods of time. Let this commit run.
- */
- ext4_journal_stop(handle);
- handle = ext4_journal_start(inode, DIO_CREDITS);
- if (IS_ERR(handle))
+ if (create && !handle) {
+ /* Direct IO write... */
+ if (max_blocks > DIO_MAX_BLOCKS)
+ max_blocks = DIO_MAX_BLOCKS;
+ handle = ext4_journal_start(inode, DIO_CREDITS +
+ 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
+ if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto get_block;
- }
-
- if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) {
- /*
- * Getting low on buffer credits...
- */
- ret = ext4_journal_extend(handle, DIO_CREDITS);
- if (ret > 0) {
- /*
- * Couldn't extend the transaction. Start a new one.
- */
- ret = ext4_journal_restart(handle, DIO_CREDITS);
+ goto out;
}
+ started = 1;
}
-get_block:
- if (ret == 0) {
- ret = ext4_get_blocks_wrap(handle, inode, iblock,
+ ret = ext4_get_blocks_wrap(handle, inode, iblock,
max_blocks, bh_result, create, 0);
- if (ret > 0) {
- bh_result->b_size = (ret << inode->i_blkbits);
- ret = 0;
- }
+ if (ret > 0) {
+ bh_result->b_size = (ret << inode->i_blkbits);
+ ret = 0;
}
+ if (started)
+ ext4_journal_stop(handle);
+out:
return ret;
}
@@ -1671,7 +1662,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
* if the machine crashes during the write.
*
* If the O_DIRECT write is intantiating holes inside i_size and the machine
- * crashes then stale disk data _may_ be exposed inside the file.
+ * crashes then stale disk data _may_ be exposed inside the file. But current
+ * VFS code falls back into buffered path in that case so we are safe.
*/
static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
@@ -1680,7 +1672,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
- handle_t *handle = NULL;
+ handle_t *handle;
ssize_t ret;
int orphan = 0;
size_t count = iov_length(iov, nr_segs);
@@ -1688,17 +1680,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
if (rw == WRITE) {
loff_t final_size = offset + count;
- handle = ext4_journal_start(inode, DIO_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
if (final_size > inode->i_size) {
+ /* Credits for sb + inode write */
+ handle = ext4_journal_start(inode, 2);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
ret = ext4_orphan_add(handle, inode);
- if (ret)
- goto out_stop;
+ if (ret) {
+ ext4_journal_stop(handle);
+ goto out;
+ }
orphan = 1;
ei->i_disksize = inode->i_size;
+ ext4_journal_stop(handle);
}
}
@@ -1706,18 +1702,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
offset, nr_segs,
ext4_get_block, NULL);
- /*
- * Reacquire the handle: ext4_get_block() can restart the transaction
- */
- handle = ext4_journal_current_handle();
-
-out_stop:
- if (handle) {
+ if (orphan) {
int err;
- if (orphan && inode->i_nlink)
+ /* Credits for sb + inode write */
+ handle = ext4_journal_start(inode, 2);
+ if (IS_ERR(handle)) {
+ /* This is really bad luck. We've written the data
+ * but cannot extend i_size. Bail out and pretend
+ * the write failed... */
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+ if (inode->i_nlink)
ext4_orphan_del(handle, inode);
- if (orphan && ret > 0) {
+ if (ret > 0) {
loff_t end = offset + ret;
if (end > inode->i_size) {
ei->i_disksize = end;
@@ -2758,13 +2757,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);
- if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 &&
- EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
- /*
- * When mke2fs creates big inodes it does not zero out
- * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE,
- * so ignore those first few inodes.
- */
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb)) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 76e5fedc0a0..dd0fcfcb35c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -420,6 +420,7 @@
#define MB_DEFAULT_GROUP_PREALLOC 512
static struct kmem_cache *ext4_pspace_cachep;
+static struct kmem_cache *ext4_ac_cachep;
#ifdef EXT4_BB_MAX_BLOCKS
#undef EXT4_BB_MAX_BLOCKS
@@ -680,7 +681,6 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
{
char *bb;
- /* FIXME!! is this needed */
BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
BUG_ON(max == NULL);
@@ -964,7 +964,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
grp->bb_fragments = fragments;
if (free != grp->bb_free) {
- printk(KERN_DEBUG
+ ext4_error(sb, __FUNCTION__,
"EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
group, free, grp->bb_free);
grp->bb_free = free;
@@ -1821,13 +1821,24 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
i = ext4_find_next_zero_bit(bitmap,
EXT4_BLOCKS_PER_GROUP(sb), i);
if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
- BUG_ON(free != 0);
+ /*
+ * IF we corrupt the bitmap we won't find any
+ * free blocks even though group info says we
+ * we have free blocks
+ */
+ ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+ "group info. But bitmap says 0\n",
+ free);
break;
}
mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
BUG_ON(ex.fe_len <= 0);
- BUG_ON(free < ex.fe_len);
+ if (free < ex.fe_len) {
+ ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+ "group info. But got %d blocks\n",
+ free, ex.fe_len);
+ }
ext4_mb_measure_extent(ac, &ex, e4b);
@@ -2959,12 +2970,19 @@ int __init init_ext4_mballoc(void)
if (ext4_pspace_cachep == NULL)
return -ENOMEM;
+ ext4_ac_cachep =
+ kmem_cache_create("ext4_alloc_context",
+ sizeof(struct ext4_allocation_context),
+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ if (ext4_ac_cachep == NULL) {
+ kmem_cache_destroy(ext4_pspace_cachep);
+ return -ENOMEM;
+ }
#ifdef CONFIG_PROC_FS
proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs);
if (proc_root_ext4 == NULL)
printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT);
#endif
-
return 0;
}
@@ -2972,6 +2990,7 @@ void exit_ext4_mballoc(void)
{
/* XXX: synchronize_rcu(); */
kmem_cache_destroy(ext4_pspace_cachep);
+ kmem_cache_destroy(ext4_ac_cachep);
#ifdef CONFIG_PROC_FS
remove_proc_entry(EXT4_ROOT, proc_root_fs);
#endif
@@ -3069,7 +3088,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
out_err:
sb->s_dirt = 1;
- put_bh(bitmap_bh);
+ brelse(bitmap_bh);
return err;
}
@@ -3354,13 +3373,10 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
ac->ac_pa = pa;
/* we don't correct pa_pstart or pa_plen here to avoid
- * possible race when tte group is being loaded concurrently
+ * possible race when the group is being loaded concurrently
* instead we correct pa later, after blocks are marked
- * in on-disk bitmap -- see ext4_mb_release_context() */
- /*
- * FIXME!! but the other CPUs can look at this particular
- * pa and think that it have enought free blocks if we
- * don't update pa_free here right ?
+ * in on-disk bitmap -- see ext4_mb_release_context()
+ * Other CPUs are prevented from allocating from this pa by lg_mutex
*/
mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
}
@@ -3699,7 +3715,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
struct buffer_head *bitmap_bh,
struct ext4_prealloc_space *pa)
{
- struct ext4_allocation_context ac;
+ struct ext4_allocation_context *ac;
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long end;
@@ -3715,9 +3731,13 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
end = bit + pa->pa_len;
- ac.ac_sb = sb;
- ac.ac_inode = pa->pa_inode;
- ac.ac_op = EXT4_MB_HISTORY_DISCARD;
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
+ if (ac) {
+ ac->ac_sb = sb;
+ ac->ac_inode = pa->pa_inode;
+ ac->ac_op = EXT4_MB_HISTORY_DISCARD;
+ }
while (bit < end) {
bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit);
@@ -3733,24 +3753,28 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
(unsigned) group);
free += next - bit;
- ac.ac_b_ex.fe_group = group;
- ac.ac_b_ex.fe_start = bit;
- ac.ac_b_ex.fe_len = next - bit;
- ac.ac_b_ex.fe_logical = 0;
- ext4_mb_store_history(&ac);
+ if (ac) {
+ ac->ac_b_ex.fe_group = group;
+ ac->ac_b_ex.fe_start = bit;
+ ac->ac_b_ex.fe_len = next - bit;
+ ac->ac_b_ex.fe_logical = 0;
+ ext4_mb_store_history(ac);
+ }
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
if (free != pa->pa_free) {
- printk(KERN_ERR "pa %p: logic %lu, phys. %lu, len %lu\n",
+ printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
pa, (unsigned long) pa->pa_lstart,
(unsigned long) pa->pa_pstart,
(unsigned long) pa->pa_len);
- printk(KERN_ERR "free %u, pa_free %u\n", free, pa->pa_free);
+ ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
+ free, pa->pa_free);
}
- BUG_ON(free != pa->pa_free);
atomic_add(free, &sbi->s_mb_discarded);
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
return err;
}
@@ -3758,12 +3782,15 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
struct ext4_prealloc_space *pa)
{
- struct ext4_allocation_context ac;
+ struct ext4_allocation_context *ac;
struct super_block *sb = e4b->bd_sb;
ext4_group_t group;
ext4_grpblk_t bit;
- ac.ac_op = EXT4_MB_HISTORY_DISCARD;
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+
+ if (ac)
+ ac->ac_op = EXT4_MB_HISTORY_DISCARD;
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
@@ -3771,13 +3798,16 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
- ac.ac_sb = sb;
- ac.ac_inode = NULL;
- ac.ac_b_ex.fe_group = group;
- ac.ac_b_ex.fe_start = bit;
- ac.ac_b_ex.fe_len = pa->pa_len;
- ac.ac_b_ex.fe_logical = 0;
- ext4_mb_store_history(&ac);
+ if (ac) {
+ ac->ac_sb = sb;
+ ac->ac_inode = NULL;
+ ac->ac_b_ex.fe_group = group;
+ ac->ac_b_ex.fe_start = bit;
+ ac->ac_b_ex.fe_len = pa->pa_len;
+ ac->ac_b_ex.fe_logical = 0;
+ ext4_mb_store_history(ac);
+ kmem_cache_free(ext4_ac_cachep, ac);
+ }
return 0;
}
@@ -4231,7 +4261,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
struct ext4_allocation_request *ar, int *errp)
{
- struct ext4_allocation_context ac;
+ struct ext4_allocation_context *ac = NULL;
struct ext4_sb_info *sbi;
struct super_block *sb;
ext4_fsblk_t block = 0;
@@ -4257,53 +4287,60 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
}
inquota = ar->len;
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+ if (!ac) {
+ *errp = -ENOMEM;
+ return 0;
+ }
+
ext4_mb_poll_new_transaction(sb, handle);
- *errp = ext4_mb_initialize_context(&ac, ar);
+ *errp = ext4_mb_initialize_context(ac, ar);
if (*errp) {
ar->len = 0;
goto out;
}
- ac.ac_op = EXT4_MB_HISTORY_PREALLOC;
- if (!ext4_mb_use_preallocated(&ac)) {
+ ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
+ if (!ext4_mb_use_preallocated(ac)) {
- ac.ac_op = EXT4_MB_HISTORY_ALLOC;
- ext4_mb_normalize_request(&ac, ar);
+ ac->ac_op = EXT4_MB_HISTORY_ALLOC;
+ ext4_mb_normalize_request(ac, ar);
repeat:
/* allocate space in core */
- ext4_mb_regular_allocator(&ac);
+ ext4_mb_regular_allocator(ac);
/* as we've just preallocated more space than
* user requested orinally, we store allocated
* space in a special descriptor */
- if (ac.ac_status == AC_STATUS_FOUND &&
- ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len)
- ext4_mb_new_preallocation(&ac);
+ if (ac->ac_status == AC_STATUS_FOUND &&
+ ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
+ ext4_mb_new_preallocation(ac);
}
- if (likely(ac.ac_status == AC_STATUS_FOUND)) {
- ext4_mb_mark_diskspace_used(&ac, handle);
+ if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+ ext4_mb_mark_diskspace_used(ac, handle);
*errp = 0;
- block = ext4_grp_offs_to_block(sb, &ac.ac_b_ex);
- ar->len = ac.ac_b_ex.fe_len;
+ block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+ ar->len = ac->ac_b_ex.fe_len;
} else {
- freed = ext4_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len);
+ freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
if (freed)
goto repeat;
*errp = -ENOSPC;
- ac.ac_b_ex.fe_len = 0;
+ ac->ac_b_ex.fe_len = 0;
ar->len = 0;
- ext4_mb_show_ac(&ac);
+ ext4_mb_show_ac(ac);
}
- ext4_mb_release_context(&ac);
+ ext4_mb_release_context(ac);
out:
if (ar->len < inquota)
DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
+ kmem_cache_free(ext4_ac_cachep, ac);
return block;
}
static void ext4_mb_poll_new_transaction(struct super_block *sb,
@@ -4405,9 +4442,9 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
unsigned long block, unsigned long count,
int metadata, unsigned long *freed)
{
- struct buffer_head *bitmap_bh = 0;
+ struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
- struct ext4_allocation_context ac;
+ struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
struct ext4_super_block *es;
unsigned long overflow;
@@ -4436,9 +4473,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
ext4_debug("freeing block %lu\n", block);
- ac.ac_op = EXT4_MB_HISTORY_FREE;
- ac.ac_inode = inode;
- ac.ac_sb = sb;
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+ if (ac) {
+ ac->ac_op = EXT4_MB_HISTORY_FREE;
+ ac->ac_inode = inode;
+ ac->ac_sb = sb;
+ }
do_more:
overflow = 0;
@@ -4504,10 +4544,12 @@ do_more:
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
err = ext4_journal_dirty_metadata(handle, bitmap_bh);
- ac.ac_b_ex.fe_group = block_group;
- ac.ac_b_ex.fe_start = bit;
- ac.ac_b_ex.fe_len = count;
- ext4_mb_store_history(&ac);
+ if (ac) {
+ ac->ac_b_ex.fe_group = block_group;
+ ac->ac_b_ex.fe_start = bit;
+ ac->ac_b_ex.fe_len = count;
+ ext4_mb_store_history(ac);
+ }
if (metadata) {
/* blocks being freed are metadata. these blocks shouldn't
@@ -4548,5 +4590,7 @@ do_more:
error_return:
brelse(bitmap_bh);
ext4_std_error(sb, err);
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
return;
}
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 3ebc2332f52..8c6c685b9d2 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -61,10 +61,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
- }
- if (needed) {
+ } else if (needed) {
retval = ext4_journal_extend(handle, needed);
- if (retval != 0) {
+ if (retval) {
/*
* IF not able to extend the journal restart the journal
*/
@@ -220,6 +219,26 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
}
+static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
+{
+ int retval = 0, needed;
+
+ if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+ return 0;
+ /*
+ * We are freeing a blocks. During this we touch
+ * superblock, group descriptor and block bitmap.
+ * So allocate a credit of 3. We may update
+ * quota (user and group).
+ */
+ needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+
+ if (ext4_journal_extend(handle, needed) != 0)
+ retval = ext4_journal_restart(handle, needed);
+
+ return retval;
+}
+
static int free_dind_blocks(handle_t *handle,
struct inode *inode, __le32 i_data)
{
@@ -234,11 +253,14 @@ static int free_dind_blocks(handle_t *handle,
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
- if (tmp_idata[i])
+ if (tmp_idata[i]) {
+ extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode,
le32_to_cpu(tmp_idata[i]), 1, 1);
+ }
}
put_bh(bh);
+ extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
return 0;
}
@@ -267,29 +289,32 @@ static int free_tind_blocks(handle_t *handle,
}
}
put_bh(bh);
+ extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
return 0;
}
-static int free_ind_block(handle_t *handle, struct inode *inode)
+static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
{
int retval;
- struct ext4_inode_info *ei = EXT4_I(inode);
- if (ei->i_data[EXT4_IND_BLOCK])
+ /* ei->i_data[EXT4_IND_BLOCK] */
+ if (i_data[0]) {
+ extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode,
- le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1);
+ le32_to_cpu(i_data[0]), 1, 1);
+ }
- if (ei->i_data[EXT4_DIND_BLOCK]) {
- retval = free_dind_blocks(handle, inode,
- ei->i_data[EXT4_DIND_BLOCK]);
+ /* ei->i_data[EXT4_DIND_BLOCK] */
+ if (i_data[1]) {
+ retval = free_dind_blocks(handle, inode, i_data[1]);
if (retval)
return retval;
}
- if (ei->i_data[EXT4_TIND_BLOCK]) {
- retval = free_tind_blocks(handle, inode,
- ei->i_data[EXT4_TIND_BLOCK]);
+ /* ei->i_data[EXT4_TIND_BLOCK] */
+ if (i_data[2]) {
+ retval = free_tind_blocks(handle, inode, i_data[2]);
if (retval)
return retval;
}
@@ -297,15 +322,13 @@ static int free_ind_block(handle_t *handle, struct inode *inode)
}
static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
- struct inode *tmp_inode, int retval)
+ struct inode *tmp_inode)
{
+ int retval;
+ __le32 i_data[3];
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
- retval = free_ind_block(handle, inode);
- if (retval)
- goto err_out;
-
/*
* One credit accounted for writing the
* i_data field of the original inode
@@ -317,6 +340,11 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
goto err_out;
}
+ i_data[0] = ei->i_data[EXT4_IND_BLOCK];
+ i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
+ i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
+
+ down_write(&EXT4_I(inode)->i_data_sem);
/*
* We have the extent map build with the tmp inode.
* Now copy the i_data across
@@ -336,8 +364,15 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
spin_lock(&inode->i_lock);
inode->i_blocks += tmp_inode->i_blocks;
spin_unlock(&inode->i_lock);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ /*
+ * We mark the inode dirty after, because we decrement the
+ * i_blocks when freeing the indirect meta-data blocks
+ */
+ retval = free_ind_block(handle, inode, i_data);
ext4_mark_inode_dirty(handle, inode);
+
err_out:
return retval;
}
@@ -365,6 +400,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
}
}
put_bh(bh);
+ extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, block, 1, 1);
return retval;
}
@@ -414,7 +450,12 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
return -EINVAL;
- down_write(&EXT4_I(inode)->i_data_sem);
+ if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
+ /*
+ * don't migrate fast symlink
+ */
+ return retval;
+
handle = ext4_journal_start(inode,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -448,13 +489,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
ext4_orphan_add(handle, tmp_inode);
ext4_journal_stop(handle);
- ei = EXT4_I(inode);
- i_data = ei->i_data;
- memset(&lb, 0, sizeof(lb));
-
- /* 32 bit block address 4 bytes */
- max_entries = inode->i_sb->s_blocksize >> 2;
-
/*
* start with one credit accounted for
* superblock modification.
@@ -463,7 +497,20 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
* trascation that created the inode. Later as and
* when we add extents we extent the journal
*/
+ /*
+ * inode_mutex prevent write and truncate on the file. Read still goes
+ * through. We take i_data_sem in ext4_ext_swap_inode_data before we
+ * switch the inode format to prevent read.
+ */
+ mutex_lock(&(inode->i_mutex));
handle = ext4_journal_start(inode, 1);
+
+ ei = EXT4_I(inode);
+ i_data = ei->i_data;
+ memset(&lb, 0, sizeof(lb));
+
+ /* 32 bit block address 4 bytes */
+ max_entries = inode->i_sb->s_blocksize >> 2;
for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
if (i_data[i]) {
retval = update_extent_range(handle, tmp_inode,
@@ -501,19 +548,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
*/
retval = finish_range(handle, tmp_inode, &lb);
err_out:
- /*
- * We are either freeing extent information or indirect
- * blocks. During this we touch superblock, group descriptor
- * and block bitmap. Later we mark the tmp_inode dirty
- * via ext4_ext_tree_init. So allocate a credit of 4
- * We may update quota (user and group).
- *
- * FIXME!! we may be touching bitmaps in different block groups.
- */
- if (ext4_journal_extend(handle,
- 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0)
- ext4_journal_restart(handle,
- 4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
if (retval)
/*
* Failure case delete the extent information with the
@@ -522,7 +556,11 @@ err_out:
free_ext_block(handle, tmp_inode);
else
retval = ext4_ext_swap_inode_data(handle, inode,
- tmp_inode, retval);
+ tmp_inode);
+
+ /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
+ if (ext4_journal_extend(handle, 1) != 0)
+ ext4_journal_restart(handle, 1);
/*
* Mark the tmp_inode as of size zero
@@ -550,8 +588,7 @@ err_out:
tmp_inode->i_nlink = 0;
ext4_journal_stop(handle);
-
- up_write(&EXT4_I(inode)->i_data_sem);
+ mutex_unlock(&(inode->i_mutex));
if (tmp_inode)
iput(tmp_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index d153bb5922f..a9347fb43bc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2223,6 +2223,7 @@ retry:
inode->i_op = &ext4_fast_symlink_inode_operations;
memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
inode->i_size = l-1;
+ EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
}
EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_add_nondir(handle, dentry, inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 93beb865c20..0072da75221 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1919,6 +1919,17 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
printk(KERN_WARNING
"EXT4-fs warning: feature flags set on rev 0 fs, "
"running e2fsck is recommended\n");
+
+ /*
+ * Since ext4 is still considered development code, we require
+ * that the TEST_FILESYS flag in s->flags be set.
+ */
+ if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
+ printk(KERN_WARNING "EXT4-fs: %s: not marked "
+ "OK to use with test code.\n", sb->s_id);
+ goto failed_mount;
+ }
+
/*
* Check feature flags regardless of the revision level, since we
* previously didn't change the revision level when setting the flags,
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 8e08efcaede..a38c7186c57 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -104,7 +104,8 @@ static int journal_write_commit_record(journal_t *journal,
{
struct journal_head *descriptor;
struct buffer_head *bh;
- int i, ret;
+ journal_header_t *header;
+ int ret;
int barrier_done = 0;
if (is_journal_aborted(journal))
@@ -116,13 +117,10 @@ static int journal_write_commit_record(journal_t *journal,
bh = jh2bh(descriptor);
- /* AKPM: buglet - add `i' to tmp! */
- for (i = 0; i < bh->b_size; i += 512) {
- journal_header_t *tmp = (journal_header_t*)bh->b_data;
- tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
- tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
- tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
- }
+ header = (journal_header_t *)(bh->b_data);
+ header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
+ header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+ header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
JBUFFER_TRACE(descriptor, "write commit block");
set_buffer_dirty(bh);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 4f302d27927..a8173081f83 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -136,18 +136,20 @@ static int journal_submit_commit_record(journal_t *journal,
JBUFFER_TRACE(descriptor, "submit commit block");
lock_buffer(bh);
-
+ get_bh(bh);
set_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
if (journal->j_flags & JBD2_BARRIER &&
- !JBD2_HAS_COMPAT_FEATURE(journal,
+ !JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
set_buffer_ordered(bh);
barrier_done = 1;
}
ret = submit_bh(WRITE, bh);
+ if (barrier_done)
+ clear_buffer_ordered(bh);
/* is it possible for another commit to fail at roughly
* the same time as this one? If so, we don't want to
@@ -166,7 +168,6 @@ static int journal_submit_commit_record(journal_t *journal,
spin_unlock(&journal->j_state_lock);
/* And try again, without the barrier */
- clear_buffer_ordered(bh);
set_buffer_uptodate(bh);
set_buffer_dirty(bh);
ret = submit_bh(WRITE, bh);
@@ -872,7 +873,8 @@ wait_for_iobuf:
if (err)
__jbd2_journal_abort_hard(journal);
}
- err = journal_wait_on_commit_record(cbh);
+ if (!err && !is_journal_aborted(journal))
+ err = journal_wait_on_commit_record(cbh);
if (err)
jbd2_journal_abort(journal, err);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index d36356f7d22..146411387ad 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -641,7 +641,7 @@ static int do_one_pass(journal_t *journal,
if (chksum_err) {
info->end_transaction = next_commit_ID;
- if (!JBD2_HAS_COMPAT_FEATURE(journal,
+ if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
printk(KERN_ERR
"JBD: Transaction %u "
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index c4f635a4dd2..25003254859 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -490,6 +490,13 @@ do { \
#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
/*
+ * Misc. filesystem flags
+ */
+#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
+#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
+#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
+
+/*
* Mount flags
*/
#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */