diff options
-rw-r--r-- | fs/ext4/dir.c | 2 | ||||
-rw-r--r-- | fs/ext4/extents.c | 59 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 22 | ||||
-rw-r--r-- | fs/ext4/inode.c | 56 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 80 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 5 | ||||
-rw-r--r-- | fs/ext4/namei.c | 18 | ||||
-rw-r--r-- | fs/ext4/resize.c | 1 | ||||
-rw-r--r-- | include/linux/ext4_fs_extents.h | 1 |
9 files changed, 177 insertions, 67 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 33888bb5814..2c23bade9aa 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -46,7 +46,7 @@ const struct file_operations ext4_dir_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif - .fsync = ext4_sync_file, /* BKL held */ + .fsync = ext4_sync_file, .release = ext4_release_dir, }; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc7081f1fbe..9ae6e67090c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -148,6 +148,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, { struct ext4_inode_info *ei = EXT4_I(inode); ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; ext4_grpblk_t colour; int depth; @@ -169,8 +170,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, /* OK. use inode's group */ bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); - colour = (current->pid % 16) * + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); return bg_start + colour + block; } @@ -349,7 +355,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) #define ext4_ext_show_leaf(inode,path) #endif -static void ext4_ext_drop_refs(struct ext4_ext_path *path) +void ext4_ext_drop_refs(struct ext4_ext_path *path) { int depth = path->p_depth; int i; @@ -2168,6 +2174,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, newblock = iblock - ee_block + ext_pblock(ex); ex2 = ex; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + /* ex1: ee_block to iblock - 1 : uninitialized */ if (iblock > ee_block) { ex1 = ex; @@ -2200,16 +2210,20 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, newdepth = ext_depth(inode); if (newdepth != depth) { depth = newdepth; - path = ext4_ext_find_extent(inode, iblock, NULL); + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, iblock, path); if (IS_ERR(path)) { err = PTR_ERR(path); - path = NULL; goto out; } eh = path[depth].p_hdr; ex = path[depth].p_ext; if (ex2 != &newex) ex2 = ex; + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; } allocated = max_blocks; } @@ -2230,9 +2244,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex2->ee_len = cpu_to_le16(allocated); if (ex2 != ex) goto insert; - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; /* * New (initialized) extent starts from the first block * in the current extent. i.e., ex2 == ex @@ -2276,9 +2287,22 @@ out: } /* + * Block allocation/map/preallocation routine for extents based files + * + * * Need to be called with * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) + * + * return > 0, number of of blocks already mapped/allocated + * if create == 0 and these are pre-allocated blocks + * buffer head is unmapped + * otherwise blocks are mapped + * + * return = 0, if plain look up failed (blocks have not been allocated) + * buffer head is unmapped + * + * return < 0, error case. */ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, @@ -2623,7 +2647,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) * modify 1 super block, 1 block bitmap and 1 group descriptor. */ credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; - down_write((&EXT4_I(inode)->i_data_sem)); + mutex_lock(&inode->i_mutex); retry: while (ret >= 0 && ret < max_blocks) { block = block + ret; @@ -2634,16 +2658,17 @@ retry: break; } - ret = ext4_ext_get_blocks(handle, inode, block, + ret = ext4_get_blocks_wrap(handle, inode, block, max_blocks, &map_bh, EXT4_CREATE_UNINITIALIZED_EXT, 0); - WARN_ON(ret <= 0); if (ret <= 0) { - ext4_error(inode->i_sb, "ext4_fallocate", - "ext4_ext_get_blocks returned error: " - "inode#%lu, block=%u, max_blocks=%lu", +#ifdef EXT4FS_DEBUG + WARN_ON(ret <= 0); + printk(KERN_ERR "%s: ext4_ext_get_blocks " + "returned error inode#%lu, block=%u, " + "max_blocks=%lu", __func__, inode->i_ino, block, max_blocks); - ret = -EIO; +#endif ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); break; @@ -2680,7 +2705,6 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - up_write((&EXT4_I(inode)->i_data_sem)); /* * Time to update the file size. * Update only when preallocation was requested beyond the file size. @@ -2692,21 +2716,18 @@ retry: * if no error, we assume preallocation succeeded * completely */ - mutex_lock(&inode->i_mutex); i_size_write(inode, offset + len); EXT4_I(inode)->i_disksize = i_size_read(inode); - mutex_unlock(&inode->i_mutex); } else if (ret < 0 && nblocks) { /* Handle partial allocation scenario */ loff_t newsize; - mutex_lock(&inode->i_mutex); newsize = (nblocks << blkbits) + i_size_read(inode); i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits)); EXT4_I(inode)->i_disksize = i_size_read(inode); - mutex_unlock(&inode->i_mutex); } } + mutex_unlock(&inode->i_mutex); return ret > 0 ? ret2 : ret; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index da18a74b966..8036b9b5376 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -702,7 +702,12 @@ got: ei->i_dir_start_lookup = 0; ei->i_disksize = 0; - ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL; + /* + * Don't inherit extent flag from directory. We set extent flag on + * newly created directory and file only if -o extent mount option is + * specified + */ + ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); /* dirsync only applies to directories */ @@ -745,12 +750,15 @@ got: goto fail_free_drop; } if (test_opt(sb, EXTENTS)) { - EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; - ext4_ext_tree_init(handle, inode); - err = ext4_update_incompat_feature(handle, sb, - EXT4_FEATURE_INCOMPAT_EXTENTS); - if (err) - goto fail; + /* set extent flag only for directory and file */ + if (S_ISDIR(mode) || S_ISREG(mode)) { + EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; + ext4_ext_tree_init(handle, inode); + err = ext4_update_incompat_feature(handle, sb, + EXT4_FEATURE_INCOMPAT_EXTENTS); + if (err) + goto fail; + } } ext4_debug("allocating inode %lu\n", inode->i_ino); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7dd9b50d5eb..945cbf6cb1f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -403,6 +403,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; __le32 *p; ext4_fsblk_t bg_start; + ext4_fsblk_t last_block; ext4_grpblk_t colour; /* Try to find previous block */ @@ -420,8 +421,13 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * into the same cylinder group then. */ bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); - colour = (current->pid % 16) * + last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; + + if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) + colour = (current->pid % 16) * (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); + else + colour = (current->pid % 16) * ((last_block - bg_start) / 16); return bg_start + colour; } @@ -768,7 +774,6 @@ err_out: * * `handle' can be NULL if create == 0. * - * The BKL may not be held on entry here. Be sure to take it early. * return > 0, # of blocks mapped or allocated. * return = 0, if plain lookup failed. * return < 0, error case. @@ -903,11 +908,38 @@ out: */ #define DIO_CREDITS 25 + +/* + * + * + * ext4_ext4 get_block() wrapper function + * It will do a look up first, and returns if the blocks already mapped. + * Otherwise it takes the write lock of the i_data_sem and allocate blocks + * and store the allocated blocks in the result buffer head and mark it + * mapped. + * + * If file type is extents based, it will call ext4_ext_get_blocks(), + * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping + * based files + * + * On success, it returns the number of blocks being mapped or allocate. + * if create==0 and the blocks are pre-allocated and uninitialized block, + * the result buffer head is unmapped. If the create ==1, it will make sure + * the buffer head is mapped. + * + * It returns 0 if plain look up failed (blocks have not been allocated), in + * that casem, buffer head is unmapped + * + * It returns the error in case of allocation failure. + */ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, int create, int extend_disksize) { int retval; + + clear_buffer_mapped(bh); + /* * Try to see if we can get the block without requesting * for new file system block. @@ -921,12 +953,26 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, inode, block, max_blocks, bh, 0, 0); } up_read((&EXT4_I(inode)->i_data_sem)); - if (!create || (retval > 0)) + + /* If it is only a block(s) look up */ + if (!create) + return retval; + + /* + * Returns if the blocks have already allocated + * + * Note that if blocks have been preallocated + * ext4_ext_get_block() returns th create = 0 + * with buffer head unmapped. + */ + if (retval > 0 && buffer_mapped(bh)) return retval; /* - * We need to allocate new blocks which will result - * in i_data update + * New blocks allocate and/or writing to uninitialized extent + * will possibly result in updating i_data, so we take + * the write lock of i_data_sem, and call get_blocks() + * with create == 1 flag. */ down_write((&EXT4_I(inode)->i_data_sem)); /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dd0fcfcb35c..ef97f19c2f9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -627,21 +627,19 @@ static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, return block; } +static inline void *mb_correct_addr_and_bit(int *bit, void *addr) +{ #if BITS_PER_LONG == 64 -#define mb_correct_addr_and_bit(bit, addr) \ -{ \ - bit += ((unsigned long) addr & 7UL) << 3; \ - addr = (void *) ((unsigned long) addr & ~7UL); \ -} + *bit += ((unsigned long) addr & 7UL) << 3; + addr = (void *) ((unsigned long) addr & ~7UL); #elif BITS_PER_LONG == 32 -#define mb_correct_addr_and_bit(bit, addr) \ -{ \ - bit += ((unsigned long) addr & 3UL) << 3; \ - addr = (void *) ((unsigned long) addr & ~3UL); \ -} + *bit += ((unsigned long) addr & 3UL) << 3; + addr = (void *) ((unsigned long) addr & ~3UL); #else #error "how many bits you are?!" #endif + return addr; +} static inline int mb_test_bit(int bit, void *addr) { @@ -649,34 +647,54 @@ static inline int mb_test_bit(int bit, void *addr) * ext4_test_bit on architecture like powerpc * needs unsigned long aligned address */ - mb_correct_addr_and_bit(bit, addr); + addr = mb_correct_addr_and_bit(&bit, addr); return ext4_test_bit(bit, addr); } static inline void mb_set_bit(int bit, void *addr) { - mb_correct_addr_and_bit(bit, addr); + addr = mb_correct_addr_and_bit(&bit, addr); ext4_set_bit(bit, addr); } static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) { - mb_correct_addr_and_bit(bit, addr); + addr = mb_correct_addr_and_bit(&bit, addr); ext4_set_bit_atomic(lock, bit, addr); } static inline void mb_clear_bit(int bit, void *addr) { - mb_correct_addr_and_bit(bit, addr); + addr = mb_correct_addr_and_bit(&bit, addr); ext4_clear_bit(bit, addr); } static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) { - mb_correct_addr_and_bit(bit, addr); + addr = mb_correct_addr_and_bit(&bit, addr); ext4_clear_bit_atomic(lock, bit, addr); } +static inline int mb_find_next_zero_bit(void *addr, int max, int start) +{ + int fix = 0; + addr = mb_correct_addr_and_bit(&fix, addr); + max += fix; + start += fix; + + return ext4_find_next_zero_bit(addr, max, start) - fix; +} + +static inline int mb_find_next_bit(void *addr, int max, int start) +{ + int fix = 0; + addr = mb_correct_addr_and_bit(&fix, addr); + max += fix; + start += fix; + + return ext4_find_next_bit(addr, max, start) - fix; +} + static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) { char *bb; @@ -906,7 +924,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, unsigned short chunk; unsigned short border; - BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb)); + BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); border = 2 << sb->s_blocksize_bits; @@ -946,12 +964,12 @@ static void ext4_mb_generate_buddy(struct super_block *sb, /* initialize buddy from bitmap which is aggregation * of on-disk bitmap and preallocations */ - i = ext4_find_next_zero_bit(bitmap, max, 0); + i = mb_find_next_zero_bit(bitmap, max, 0); grp->bb_first_free = i; while (i < max) { fragments++; first = i; - i = ext4_find_next_bit(bitmap, max, i); + i = mb_find_next_bit(bitmap, max, i); len = i - first; free += len; if (len > 1) @@ -959,7 +977,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, else grp->bb_counters[0]++; if (i < max) - i = ext4_find_next_zero_bit(bitmap, max, i); + i = mb_find_next_zero_bit(bitmap, max, i); } grp->bb_fragments = fragments; @@ -967,6 +985,10 @@ static void ext4_mb_generate_buddy(struct super_block *sb, ext4_error(sb, __FUNCTION__, "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", group, free, grp->bb_free); + /* + * If we intent to continue, we consider group descritor + * corrupt and update bb_free using bitmap value + */ grp->bb_free = free; } @@ -1778,7 +1800,7 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, buddy = mb_find_buddy(e4b, i, &max); BUG_ON(buddy == NULL); - k = ext4_find_next_zero_bit(buddy, max, 0); + k = mb_find_next_zero_bit(buddy, max, 0); BUG_ON(k >= max); ac->ac_found++; @@ -1818,11 +1840,11 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, i = e4b->bd_info->bb_first_free; while (free && ac->ac_status == AC_STATUS_CONTINUE) { - i = ext4_find_next_zero_bit(bitmap, + i = mb_find_next_zero_bit(bitmap, EXT4_BLOCKS_PER_GROUP(sb), i); if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { /* - * IF we corrupt the bitmap we won't find any + * IF we have corrupt bitmap, we won't find any * free blocks even though group info says we * we have free blocks */ @@ -1838,6 +1860,12 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, ext4_error(sb, __FUNCTION__, "%d free blocks as per " "group info. But got %d blocks\n", free, ex.fe_len); + /* + * The number of free blocks differs. This mostly + * indicate that the bitmap is corrupt. So exit + * without claiming the space. + */ + break; } ext4_mb_measure_extent(ac, &ex, e4b); @@ -3740,10 +3768,10 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, } while (bit < end) { - bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit); + bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit); if (bit >= end) break; - next = ext4_find_next_bit(bitmap_bh->b_data, end, bit); + next = mb_find_next_bit(bitmap_bh->b_data, end, bit); if (next > end) next = end; start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + @@ -3771,6 +3799,10 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, (unsigned long) pa->pa_len); ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", free, pa->pa_free); + /* + * pa is already deleted so we use the value obtained + * from the bitmap and continue. + */ } atomic_add(free, &sbi->s_mb_discarded); if (ac) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 8c6c685b9d2..5c1e27de775 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -43,6 +43,7 @@ static int finish_range(handle_t *handle, struct inode *inode, if (IS_ERR(path)) { retval = PTR_ERR(path); + path = NULL; goto err_out; } @@ -74,6 +75,10 @@ static int finish_range(handle_t *handle, struct inode *inode, } retval = ext4_ext_insert_extent(handle, inode, path, &newext); err_out: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } lb->first_pblock = 0; return retval; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a9347fb43bc..28aa2ed4297 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1804,12 +1804,8 @@ retry: inode->i_fop = &ext4_dir_operations; inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; dir_block = ext4_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - ext4_dec_count(handle, inode); /* is this nlink == 0? */ - ext4_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; - } + if (!dir_block) + goto out_clear_inode; BUFFER_TRACE(dir_block, "get_write_access"); ext4_journal_get_write_access(handle, dir_block); de = (struct ext4_dir_entry_2 *) dir_block->b_data; @@ -1832,7 +1828,8 @@ retry: ext4_mark_inode_dirty(handle, inode); err = ext4_add_entry (handle, dentry, inode); if (err) { - inode->i_nlink = 0; +out_clear_inode: + clear_nlink(inode); ext4_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -2164,7 +2161,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) dir->i_ctime = dir->i_mtime = ext4_current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); - ext4_dec_count(handle, inode); + drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode->i_ctime = ext4_current_time(inode); @@ -2214,7 +2211,7 @@ retry: err = __page_symlink(inode, symname, l, mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { - ext4_dec_count(handle, inode); + clear_nlink(inode); ext4_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -2223,7 +2220,6 @@ retry: inode->i_op = &ext4_fast_symlink_inode_operations; memcpy((char*)&EXT4_I(inode)->i_data,symname,l); inode->i_size = l-1; - EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; } EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_add_nondir(handle, dentry, inode); @@ -2407,7 +2403,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, ext4_dec_count(handle, old_dir); if (new_inode) { /* checked empty_dir above, can't have another parent, - * ext3_dec_count() won't work for many-linked dirs */ + * ext4_dec_count() won't work for many-linked dirs */ new_inode->i_nlink = 0; } else { ext4_inc_count(handle, new_dir); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9477a2bd6ff..e29efa0f9d6 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1037,6 +1037,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_warning(sb, __FUNCTION__, "multiple resizers run on filesystem!"); unlock_super(sb); + ext4_journal_stop(handle); err = -EBUSY; goto exit_put; } diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index 697da4bce6c..1285c583b2d 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -227,5 +227,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, ext4_lblk_t *, ext4_fsblk_t *); extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, ext4_lblk_t *, ext4_fsblk_t *); +extern void ext4_ext_drop_refs(struct ext4_ext_path *); #endif /* _LINUX_EXT4_EXTENTS */ |