aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-11-10 09:10:44 +0100
committerIngo Molnar <mingo@elte.hu>2008-11-10 09:10:44 +0100
commita5a64498c194c82ecad3a2d67cff6231cda8d3dd (patch)
tree723d5d81419f9960b8d30ed9a2ece8a58d6c4328 /fs/ext4
parentbb93d802ae5c1949977cc6da247b218240677f11 (diff)
parentf7160c7573615ec82c691e294cf80d920b5d588d (diff)
Merge commit 'v2.6.28-rc4' into timers/rtc
Conflicts: drivers/rtc/rtc-cmos.c
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig79
-rw-r--r--fs/ext4/balloc.c77
-rw-r--r--fs/ext4/dir.c20
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/mballoc.c1
-rw-r--r--fs/ext4/namei.c12
-rw-r--r--fs/ext4/super.c63
9 files changed, 155 insertions, 109 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
new file mode 100644
index 00000000000..7505482a08f
--- /dev/null
+++ b/fs/ext4/Kconfig
@@ -0,0 +1,79 @@
+config EXT4_FS
+ tristate "The Extended 4 (ext4) filesystem"
+ select JBD2
+ select CRC16
+ help
+ This is the next generation of the ext3 filesystem.
+
+ Unlike the change from ext2 filesystem to ext3 filesystem,
+ the on-disk format of ext4 is not forwards compatible with
+ ext3; it is based on extent maps and it supports 48-bit
+ physical block numbers. The ext4 filesystem also supports delayed
+ allocation, persistent preallocation, high resolution time stamps,
+ and a number of other features to improve performance and speed
+ up fsck time. For more information, please see the web pages at
+ http://ext4.wiki.kernel.org.
+
+ The ext4 filesystem will support mounting an ext3
+ filesystem; while there will be some performance gains from
+ the delayed allocation and inode table readahead, the best
+ performance gains will require enabling ext4 features in the
+ filesystem, or formating a new filesystem as an ext4
+ filesystem initially.
+
+ To compile this file system support as a module, choose M here. The
+ module will be called ext4.
+
+ If unsure, say N.
+
+config EXT4DEV_COMPAT
+ bool "Enable ext4dev compatibility"
+ depends on EXT4_FS
+ help
+ Starting with 2.6.28, the name of the ext4 filesystem was
+ renamed from ext4dev to ext4. Unfortunately there are some
+ legacy userspace programs (such as klibc's fstype) have
+ "ext4dev" hardcoded.
+
+ To enable backwards compatibility so that systems that are
+ still expecting to mount ext4 filesystems using ext4dev,
+ chose Y here. This feature will go away by 2.6.31, so
+ please arrange to get your userspace programs fixed!
+
+config EXT4_FS_XATTR
+ bool "Ext4 extended attributes"
+ depends on EXT4_FS
+ default y
+ help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+ <http://acl.bestbits.at/> for details).
+
+ If unsure, say N.
+
+ You need this for POSIX ACL support on ext4.
+
+config EXT4_FS_POSIX_ACL
+ bool "Ext4 POSIX Access Control Lists"
+ depends on EXT4_FS_XATTR
+ select FS_POSIX_ACL
+ help
+ POSIX Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N
+
+config EXT4_FS_SECURITY
+ bool "Ext4 Security Labels"
+ depends on EXT4_FS_XATTR
+ help
+ Security labels support alternative access control models
+ implemented by security modules like SELinux. This option
+ enables an extended attribute handler for file security
+ labels in the ext4 filesystem.
+
+ If you are not using a security module that requires using
+ extended attributes for file security labels, say N.
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b9821be709b..d2003cdc36a 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -589,21 +589,23 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
return;
}
-int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
- s64 nblocks)
+/**
+ * ext4_has_free_blocks()
+ * @sbi: in-core super block structure.
+ * @nblocks: number of needed blocks
+ *
+ * Check if filesystem has nblocks free & available for allocation.
+ * On success return 1, return 0 on failure.
+ */
+int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
{
- s64 free_blocks, dirty_blocks;
- s64 root_blocks = 0;
+ s64 free_blocks, dirty_blocks, root_blocks;
struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
free_blocks = percpu_counter_read_positive(fbc);
dirty_blocks = percpu_counter_read_positive(dbc);
-
- if (!capable(CAP_SYS_RESOURCE) &&
- sbi->s_resuid != current->fsuid &&
- (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
- root_blocks = ext4_r_blocks_count(sbi->s_es);
+ root_blocks = ext4_r_blocks_count(sbi->s_es);
if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
EXT4_FREEBLOCKS_WATERMARK) {
@@ -616,57 +618,32 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
}
}
/* Check whether we have space after
- * accounting for current dirty blocks
+ * accounting for current dirty blocks & root reserved blocks.
*/
- if (free_blocks < ((root_blocks + nblocks) + dirty_blocks))
- /* we don't have free space */
- return -ENOSPC;
+ if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks))
+ return 1;
+
+ /* Hm, nope. Are (enough) root reserved blocks available? */
+ if (sbi->s_resuid == current->fsuid ||
+ ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
+ capable(CAP_SYS_RESOURCE)) {
+ if (free_blocks >= (nblocks + dirty_blocks))
+ return 1;
+ }
- /* Add the blocks to nblocks */
- percpu_counter_add(dbc, nblocks);
return 0;
}
-/**
- * ext4_has_free_blocks()
- * @sbi: in-core super block structure.
- * @nblocks: number of neeed blocks
- *
- * Check if filesystem has free blocks available for allocation.
- * Return the number of blocks avaible for allocation for this request
- * On success, return nblocks
- */
-ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
+int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
s64 nblocks)
{
- s64 free_blocks, dirty_blocks;
- s64 root_blocks = 0;
- struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
- struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
-
- free_blocks = percpu_counter_read_positive(fbc);
- dirty_blocks = percpu_counter_read_positive(dbc);
-
- if (!capable(CAP_SYS_RESOURCE) &&
- sbi->s_resuid != current->fsuid &&
- (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
- root_blocks = ext4_r_blocks_count(sbi->s_es);
-
- if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
- EXT4_FREEBLOCKS_WATERMARK) {
- free_blocks = percpu_counter_sum(fbc);
- dirty_blocks = percpu_counter_sum(dbc);
- }
- if (free_blocks <= (root_blocks + dirty_blocks))
- /* we don't have free space */
+ if (ext4_has_free_blocks(sbi, nblocks)) {
+ percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
return 0;
-
- if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
- return free_blocks - (root_blocks + dirty_blocks);
- return nblocks;
+ } else
+ return -ENOSPC;
}
-
/**
* ext4_should_retry_alloc()
* @sb: super block
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3ca6a2b7632..fed5b610df5 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -459,17 +459,8 @@ static int ext4_dx_readdir(struct file *filp,
if (info->extra_fname) {
if (call_filldir(filp, dirent, filldir, info->extra_fname))
goto finished;
-
info->extra_fname = NULL;
- info->curr_node = rb_next(info->curr_node);
- if (!info->curr_node) {
- if (info->next_hash == ~0) {
- filp->f_pos = EXT4_HTREE_EOF;
- goto finished;
- }
- info->curr_hash = info->next_hash;
- info->curr_minor_hash = 0;
- }
+ goto next_node;
} else if (!info->curr_node)
info->curr_node = rb_first(&info->root);
@@ -501,9 +492,14 @@ static int ext4_dx_readdir(struct file *filp,
info->curr_minor_hash = fname->minor_hash;
if (call_filldir(filp, dirent, filldir, fname))
break;
-
+ next_node:
info->curr_node = rb_next(info->curr_node);
- if (!info->curr_node) {
+ if (info->curr_node) {
+ fname = rb_entry(info->curr_node, struct fname,
+ rb_hash);
+ info->curr_hash = fname->hash;
+ info->curr_minor_hash = fname->minor_hash;
+ } else {
if (info->next_hash == ~0) {
filp->f_pos = EXT4_HTREE_EOF;
break;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4880cc3e672..b0537c82702 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1003,8 +1003,7 @@ extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
unsigned long *count, int *errp);
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
-extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
- s64 nblocks);
+extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count, int metadata);
extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fe34d74cfb1..2a117e286e5 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -718,6 +718,8 @@ got:
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
free = ext4_free_blocks_after_init(sb, group, gdp);
gdp->bg_free_blocks_count = cpu_to_le16(free);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
+ gdp);
}
spin_unlock(sb_bgl_lock(sbi, group));
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8dbf6953845..be21a5ae33c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2329,6 +2329,8 @@ static int ext4_da_writepage(struct page *page,
unlock_page(page);
return 0;
}
+ /* now mark the buffer_heads as dirty and uptodate */
+ block_commit_write(page, 0, PAGE_CACHE_SIZE);
}
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -4580,9 +4582,10 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
- return ext4_indirect_trans_blocks(inode, nrblocks, 0);
- return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
+ return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
+ return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
}
+
/*
* Account for index blocks, block groups bitmaps and block group
* descriptor blocks if modify datablocks and index blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dfe17a13405..444ad998f72 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4441,6 +4441,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
else if (block >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
+ ext4_unlock_group(sb, group);
ext4_error(sb, __func__,
"Double free of blocks %d (%d %d)\n",
block, entry->start_blk, entry->count);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 92db9e94514..63adcb79298 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1061,7 +1061,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
struct dentry *ext4_get_parent(struct dentry *child)
{
unsigned long ino;
- struct dentry *parent;
struct inode *inode;
static const struct qstr dotdot = {
.name = "..",
@@ -1083,16 +1082,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
return ERR_PTR(-EIO);
}
- inode = ext4_iget(child->d_inode->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- parent = d_alloc_anon(inode);
- if (!parent) {
- iput(inode);
- parent = ERR_PTR(-ENOMEM);
- }
- return parent;
+ return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
}
#define S_SHIFT 12
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9b2b2bc4ec1..e4a241c65db 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -333,7 +333,8 @@ void ext4_abort(struct super_block *sb, const char *function,
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
sb->s_flags |= MS_RDONLY;
EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+ if (EXT4_SB(sb)->s_journal)
+ jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
}
void ext4_warning(struct super_block *sb, const char *function,
@@ -399,7 +400,7 @@ fail:
static int ext4_blkdev_put(struct block_device *bdev)
{
bd_release(bdev);
- return blkdev_put(bdev);
+ return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
}
static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
@@ -442,14 +443,16 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- int i;
+ int i, err;
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
- if (jbd2_journal_destroy(sbi->s_journal) < 0)
- ext4_abort(sb, __func__, "Couldn't clean up the journal");
+ err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
+ if (err < 0)
+ ext4_abort(sb, __func__, "Couldn't clean up the journal");
+
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -1455,9 +1458,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
/* We allocate both existing and potentially added groups */
flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
- ((sbi->s_es->s_reserved_gdt_blocks +1 ) <<
- EXT4_DESC_PER_BLOCK_BITS(sb))) /
- groups_per_flex;
+ ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
+ EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
sbi->s_flex_groups = kzalloc(flex_group_count *
sizeof(struct flex_groups), GFP_KERNEL);
if (sbi->s_flex_groups == NULL) {
@@ -2553,7 +2555,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
if (bd_claim(bdev, sb)) {
printk(KERN_ERR
"EXT4: failed to claim external journal device.\n");
- blkdev_put(bdev);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
return NULL;
}
@@ -2882,12 +2884,9 @@ int ext4_force_commit(struct super_block *sb)
/*
* Ext4 always journals updates to the superblock itself, so we don't
* have to propagate any other updates to the superblock on disk at this
- * point. Just start an async writeback to get the buffers on their way
- * to the disk.
- *
- * This implicitly triggers the writebehind on sync().
+ * point. (We can probably nuke this function altogether, and remove
+ * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
*/
-
static void ext4_write_super(struct super_block *sb)
{
if (mutex_trylock(&sb->s_lock) != 0)
@@ -2897,15 +2896,15 @@ static void ext4_write_super(struct super_block *sb)
static int ext4_sync_fs(struct super_block *sb, int wait)
{
- tid_t target;
+ int ret = 0;
trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
sb->s_dirt = 0;
- if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
- if (wait)
- jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
- }
- return 0;
+ if (wait)
+ ret = ext4_force_commit(sb);
+ else
+ jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+ return ret;
}
/*
@@ -3328,30 +3327,30 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
* Standard function to be called on quota_on
*/
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount)
+ char *name, int remount)
{
int err;
- struct nameidata nd;
+ struct path path;
if (!test_opt(sb, QUOTA))
return -EINVAL;
- /* When remounting, no checks are needed and in fact, path is NULL */
+ /* When remounting, no checks are needed and in fact, name is NULL */
if (remount)
- return vfs_quota_on(sb, type, format_id, path, remount);
+ return vfs_quota_on(sb, type, format_id, name, remount);
- err = path_lookup(path, LOOKUP_FOLLOW, &nd);
+ err = kern_path(name, LOOKUP_FOLLOW, &path);
if (err)
return err;
/* Quotafile not on the same filesystem? */
- if (nd.path.mnt->mnt_sb != sb) {
- path_put(&nd.path);
+ if (path.mnt->mnt_sb != sb) {
+ path_put(&path);
return -EXDEV;
}
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
/* Quotafile not in fs root? */
- if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+ if (path.dentry->d_parent != sb->s_root)
printk(KERN_WARNING
"EXT4-fs: Quota file not on filesystem root. "
"Journaled quota will not work.\n");
@@ -3361,7 +3360,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* When we journal data on quota file, we have to flush journal to see
* all updates to the file when we bypass pagecache...
*/
- if (ext4_should_journal_data(nd.path.dentry->d_inode)) {
+ if (ext4_should_journal_data(path.dentry->d_inode)) {
/*
* We don't need to lock updates but journal_flush() could
* otherwise be livelocked...
@@ -3370,13 +3369,13 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err) {
- path_put(&nd.path);
+ path_put(&path);
return err;
}
}
- err = vfs_quota_on_path(sb, type, format_id, &nd.path);
- path_put(&nd.path);
+ err = vfs_quota_on_path(sb, type, format_id, &path);
+ path_put(&path);
return err;
}