From f36f21ecca9ee688301174e5f2e0827827a7a7ff Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 12 May 2008 14:02:33 -0700
Subject: Fix misuses of bdevname()

bdevname() fills the buffer that it is given as a parameter, so calling
strcpy() or snprintf() on the returned value is redundant (and probably not
guaranteed to work - I don't think strcpy and snprintf support overlapping
buffers.)

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/mballoc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index fbec2ef9379..b128bdc0f55 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2639,8 +2639,7 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
 	struct proc_dir_entry *proc;
 	char devname[64];
 
-	snprintf(devname, sizeof(devname) - 1, "%s",
-		bdevname(sb->s_bdev, devname));
+	bdevname(sb->s_bdev, devname);
 	sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
 
 	MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
@@ -2674,8 +2673,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
 	if (sbi->s_mb_proc == NULL)
 		return -EINVAL;
 
-	snprintf(devname, sizeof(devname) - 1, "%s",
-		bdevname(sb->s_bdev, devname));
+	bdevname(sb->s_bdev, devname);
 	remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
 	remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
 	remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
-- 
cgit v1.2.3


From 7e01c8e5420b6c7f9d85d34c15d8c7a15c9fc720 Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Wed, 14 May 2008 16:05:47 -0700
Subject: ext3/4: fix uninitialized bs in ext3/4_xattr_set_handle()

This fix the uninitialized bs when we try to replace a xattr entry in
ibody with the new value which require more than free space.

This situation only happens we format ext3/4 with inode size more than 128 and
we have put xattr entries both in ibody and block.  The consequences about
this bug is we will lost the xattr block which pointed by i_file_acl with all
xattr entires in it.  We will alloc a new xattr block and put that large value
entry in it.  The old xattr block will become orphan block.

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: Andreas Gruenbacher <agruen@suse.de>
Acked-by: Andreas Dilger <adilger@sun.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/xattr.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3fbc2c6c3d0..ff08633f398 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1009,6 +1009,11 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 			i.value = NULL;
 			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 		} else if (error == -ENOSPC) {
+			if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
+				error = ext4_xattr_block_find(inode, &i, &bs);
+				if (error)
+					goto cleanup;
+			}
 			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 			if (error)
 				goto cleanup;
-- 
cgit v1.2.3


From dfc5d03f12e706c19ee37734184ea96582ef931d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 May 2008 19:11:51 -0400
Subject: ext4: correct mount option parsing to detect when quota options can
 be changed

We should not allow user to change quota mount options when quota is
just suspended.  It would make mount options and internal quota state
inconsistent.  Also we should not allow user to change quota format when
quota is turned on.  On the other hand we can just silently ignore when
some option is set to the value it already has (mount does this on
remount).

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 52dd0679a4e..686ebcc2e6c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -979,7 +979,7 @@ static int parse_options (char *options, struct super_block *sb,
 	int data_opt = 0;
 	int option;
 #ifdef CONFIG_QUOTA
-	int qtype;
+	int qtype, qfmt;
 	char *qname;
 #endif
 
@@ -1162,7 +1162,9 @@ static int parse_options (char *options, struct super_block *sb,
 		case Opt_grpjquota:
 			qtype = GRPQUOTA;
 set_qf_name:
-			if (sb_any_quota_enabled(sb)) {
+			if ((sb_any_quota_enabled(sb) ||
+			     sb_any_quota_suspended(sb)) &&
+			    !sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR
 					"EXT4-fs: Cannot change journalled "
 					"quota options when quota turned on.\n");
@@ -1200,7 +1202,9 @@ set_qf_name:
 		case Opt_offgrpjquota:
 			qtype = GRPQUOTA;
 clear_qf_name:
-			if (sb_any_quota_enabled(sb)) {
+			if ((sb_any_quota_enabled(sb) ||
+			     sb_any_quota_suspended(sb)) &&
+			    sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR "EXT4-fs: Cannot change "
 					"journalled quota options when "
 					"quota turned on.\n");
@@ -1213,10 +1217,20 @@ clear_qf_name:
 			sbi->s_qf_names[qtype] = NULL;
 			break;
 		case Opt_jqfmt_vfsold:
-			sbi->s_jquota_fmt = QFMT_VFS_OLD;
-			break;
+			qfmt = QFMT_VFS_OLD;
+			goto set_qf_format;
 		case Opt_jqfmt_vfsv0:
-			sbi->s_jquota_fmt = QFMT_VFS_V0;
+			qfmt = QFMT_VFS_V0;
+set_qf_format:
+			if ((sb_any_quota_enabled(sb) ||
+			     sb_any_quota_suspended(sb)) &&
+			    sbi->s_jquota_fmt != qfmt) {
+				printk(KERN_ERR "EXT4-fs: Cannot change "
+					"journaled quota options when "
+					"quota turned on.\n");
+				return 0;
+			}
+			sbi->s_jquota_fmt = qfmt;
 			break;
 		case Opt_quota:
 		case Opt_usrquota:
-- 
cgit v1.2.3


From cd59e7b9781a35716b8a3e8c4aa2d48081d7daf7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 May 2008 19:11:51 -0400
Subject: ext4: Fix mount messages when quota disabled

When quota is disabled, we should not print 'journaled quota not
supported' when user tried to mount non-journaled quota. Also fix typo
in the message.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 686ebcc2e6c..94a527261ca 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1255,6 +1255,9 @@ set_qf_format:
 		case Opt_quota:
 		case Opt_usrquota:
 		case Opt_grpquota:
+			printk(KERN_ERR
+				"EXT4-fs: quota options not supported.\n");
+			break;
 		case Opt_usrjquota:
 		case Opt_grpjquota:
 		case Opt_offusrjquota:
@@ -1262,7 +1265,7 @@ set_qf_format:
 		case Opt_jqfmt_vfsold:
 		case Opt_jqfmt_vfsv0:
 			printk(KERN_ERR
-				"EXT4-fs: journalled quota options not "
+				"EXT4-fs: journaled quota options not "
 				"supported.\n");
 			break;
 		case Opt_noquota:
-- 
cgit v1.2.3


From 0623543b3335c8e439cacf21af99bbf45da42c5a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 May 2008 19:11:51 -0400
Subject: ext4: fix synchronization of quota files in journal=data mode

In journal=data mode, it is not enough to do write_inode_now as done in
vfs_quota_on() to write all data to their final location (which is
needed for quota_read to work correctly).  Calling journal_flush() does
its job.

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 94a527261ca..cddf7f0e0fd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3170,23 +3170,42 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 
 	if (!test_opt(sb, QUOTA))
 		return -EINVAL;
-	/* Not journalling quota? */
-	if ((!EXT4_SB(sb)->s_qf_names[USRQUOTA] &&
-	    !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
+	/* When remounting, no checks are needed and in fact, path is NULL */
+	if (remount)
 		return vfs_quota_on(sb, type, format_id, path, remount);
+
 	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
 	if (err)
 		return err;
+
 	/* Quotafile not on the same filesystem? */
 	if (nd.path.mnt->mnt_sb != sb) {
 		path_put(&nd.path);
 		return -EXDEV;
 	}
-	/* Quotafile not of fs root? */
-	if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-		printk(KERN_WARNING
-			"EXT4-fs: Quota file not on filesystem root. "
-			"Journalled quota will not work.\n");
+	/* Journaling quota? */
+	if (EXT4_SB(sb)->s_qf_names[type]) {
+		/* Quotafile not of fs root? */
+		if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+			printk(KERN_WARNING
+				"EXT4-fs: Quota file not on filesystem root. "
+				"Journaled quota will not work.\n");
+ 	}
+
+	/*
+	 * When we journal data on quota file, we have to flush journal to see
+	 * all updates to the file when we bypass pagecache...
+	 */
+	if (ext4_should_journal_data(nd.path.dentry->d_inode)) {
+		/*
+		 * We don't need to lock updates but journal_flush() could
+		 * otherwise be livelocked...
+		 */
+		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+		jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+	}
+
 	path_put(&nd.path);
 	return vfs_quota_on(sb, type, format_id, path, remount);
 }
-- 
cgit v1.2.3


From 2c8be6b222f76c332d9faeb00c047996d340632c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 May 2008 21:27:55 -0400
Subject: ext4: fix typos in messages and comments (journalled -> journaled)

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cddf7f0e0fd..09d9359c805 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1166,7 +1166,7 @@ set_qf_name:
 			     sb_any_quota_suspended(sb)) &&
 			    !sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR
-					"EXT4-fs: Cannot change journalled "
+					"EXT4-fs: Cannot change journaled "
 					"quota options when quota turned on.\n");
 				return 0;
 			}
@@ -1206,7 +1206,7 @@ clear_qf_name:
 			     sb_any_quota_suspended(sb)) &&
 			    sbi->s_qf_names[qtype]) {
 				printk(KERN_ERR "EXT4-fs: Cannot change "
-					"journalled quota options when "
+					"journaled quota options when "
 					"quota turned on.\n");
 				return 0;
 			}
@@ -1350,14 +1350,14 @@ set_qf_format:
 		}
 
 		if (!sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT4-fs: journalled quota format "
+			printk(KERN_ERR "EXT4-fs: journaled quota format "
 					"not specified.\n");
 			return 0;
 		}
 	} else {
 		if (sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT4-fs: journalled quota format "
-					"specified with no journalling "
+			printk(KERN_ERR "EXT4-fs: journaled quota format "
+					"specified with no journaling "
 					"enabled.\n");
 			return 0;
 		}
@@ -1598,7 +1598,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
 			int ret = ext4_quota_on_mount(sb, i);
 			if (ret < 0)
 				printk(KERN_ERR
-					"EXT4-fs: Cannot turn on journalled "
+					"EXT4-fs: Cannot turn on journaled "
 					"quota: error %d\n", ret);
 		}
 	}
@@ -3123,7 +3123,7 @@ static int ext4_release_dquot(struct dquot *dquot)
 
 static int ext4_mark_dquot_dirty(struct dquot *dquot)
 {
-	/* Are we journalling quotas? */
+	/* Are we journaling quotas? */
 	if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
 	    EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
 		dquot_mark_dquot_dirty(dquot);
-- 
cgit v1.2.3


From 1930479c4b6bbcb6f164a5b3498e0d98329967f4 Mon Sep 17 00:00:00 2001
From: Valerie Clement <valerie.clement@bull.net>
Date: Tue, 13 May 2008 19:31:14 -0400
Subject: ext4: mballoc fix mb_normalize_request algorithm for 1KB block size
 filesystems

In case of inode preallocation, the number of blocks to allocate depends
on the file size and it is calculated in ext4_mb_normalize_request().
Each group in the filesystem is then checked to find one that can be
used for allocation; this is done in ext4_mb_good_group().

When a file bigger than 4MB is created, the requested number of blocks
to preallocate, calculated by ext4_mb_normalize_request is 4096.
However for a filesystem with 1KB block size, the maximum size of the
block buddies used by the multiblock allocator is 2048, so none of
groups in the filesystem satisfies the search criteria in
ext4_mb_good_group(). Scanning all the filesystem groups impacts
performance.

This was demonstrated by using a freshly created, 70GB, 1k block
filesystem, with caches dropped write before the test via
/proc/sys/vm/drop_caches, and with the filesystem mounted with
nodelalloc and nodealloc,nomballoc.  The time to write an 8 megabyte
file using "dd if=/dev/zero of=/mnt/test/fo bs=8k count=1k conv=fsync"
took 35.5091 seconds (236kB/s) with nodellaloc, and 0.233754 seconds
(35.9 MB/s) with the nodelloc,nomballoc options.  With a 1TB partition,
it took several minutes to write 8MB!

This patch modifies the algorithm in ext4_mb_normalize_group_request to
calculate the number of blocks to allocate by taking into account the
maximum size of free blocks chunks handled by the multiblock allocator.

It has also been tested for filesystems with 2KB and 4KB block sizes to
ensure that those cases don't regress.

Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Valerie Clement <valerie.clement@bull.net>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b128bdc0f55..1d7fde99452 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2880,12 +2880,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	if (size < i_size_read(ac->ac_inode))
 		size = i_size_read(ac->ac_inode);
 
-	/* max available blocks in a free group */
-	max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 -
-				EXT4_SB(ac->ac_sb)->s_itb_per_group;
+	/* max size of free chunks */
+	max = 2 << bsbits;
 
-#define NRL_CHECK_SIZE(req, size, max,bits)	\
-		(req <= (size) || max <= ((size) >> bits))
+#define NRL_CHECK_SIZE(req, size, max, chunk_size)	\
+		(req <= (size) || max <= (chunk_size))
 
 	/* first, try to predict filesize */
 	/* XXX: should this table be tunable? */
@@ -2904,16 +2903,16 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 		size = 512 * 1024;
 	} else if (size <= 1024 * 1024) {
 		size = 1024 * 1024;
-	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) {
+	} else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
 		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-						(20 - bsbits)) << 20;
-		size = 1024 * 1024;
-	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) {
+						(21 - bsbits)) << 21;
+		size = 2 * 1024 * 1024;
+	} else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
 		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
 							(22 - bsbits)) << 22;
 		size = 4 * 1024 * 1024;
 	} else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
-					(8<<20)>>bsbits, max, bsbits)) {
+					(8<<20)>>bsbits, max, 8 * 1024)) {
 		start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
 							(23 - bsbits)) << 23;
 		size = 8 * 1024 * 1024;
-- 
cgit v1.2.3


From 519deca0496a4df07d15acf3181ca5d573bffdec Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 15 May 2008 14:43:20 -0400
Subject: ext4: Retry block allocation if new blocks are allocated from system
 zone.

If the block allocator gets blocks out of system zone ext4 calls
ext4_error. But if the file system is mounted with errors=continue
retry block allocation. We need to mark the system zone blocks as
in use to make sure retry don't pick them again

System zone is the block range mapping block bitmap, inode bitmap and inode
table.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/balloc.c  | 17 +++++++++++------
 fs/ext4/mballoc.c | 47 ++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 17 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index da994374ec3..30494c5da84 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -287,11 +287,11 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 			    (int)block_group, (unsigned long long)bitmap_blk);
 		return NULL;
 	}
-	if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) {
-		put_bh(bh);
-		return NULL;
-	}
-
+	ext4_valid_block_bitmap(sb, desc, block_group, bh);
+	/*
+	 * file system mounted not to panic on error,
+	 * continue with corrupt bitmap
+	 */
 	return bh;
 }
 /*
@@ -1770,7 +1770,12 @@ allocated:
 			    "Allocating block in system zone - "
 			    "blocks from %llu, length %lu",
 			     ret_block, num);
-		goto out;
+		/*
+		 * claim_block marked the blocks we allocated
+		 * as in use. So we may want to selectively
+		 * mark some of the blocks as free
+		 */
+		goto retry_alloc;
 	}
 
 	performed_allocation = 1;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1d7fde99452..873ad9b3418 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2736,7 +2736,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	struct ext4_sb_info *sbi;
 	struct super_block *sb;
 	ext4_fsblk_t block;
-	int err;
+	int err, len;
 
 	BUG_ON(ac->ac_status != AC_STATUS_FOUND);
 	BUG_ON(ac->ac_b_ex.fe_len <= 0);
@@ -2770,14 +2770,27 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		+ ac->ac_b_ex.fe_start
 		+ le32_to_cpu(es->s_first_data_block);
 
-	if (block == ext4_block_bitmap(sb, gdp) ||
-			block == ext4_inode_bitmap(sb, gdp) ||
-			in_range(block, ext4_inode_table(sb, gdp),
-				EXT4_SB(sb)->s_itb_per_group)) {
-
+	len = ac->ac_b_ex.fe_len;
+	if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
+	    in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
+	    in_range(block, ext4_inode_table(sb, gdp),
+		     EXT4_SB(sb)->s_itb_per_group) ||
+	    in_range(block + len - 1, ext4_inode_table(sb, gdp),
+		     EXT4_SB(sb)->s_itb_per_group)) {
 		ext4_error(sb, __func__,
 			   "Allocating block in system zone - block = %llu",
 			   block);
+		/* File system mounted not to panic on error
+		 * Fix the bitmap and repeat the block allocation
+		 * We leak some of the blocks here.
+		 */
+		mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
+				bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+				ac->ac_b_ex.fe_len);
+		err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+		if (!err)
+			err = -EAGAIN;
+		goto out_err;
 	}
 #ifdef AGGRESSIVE_CHECK
 	{
@@ -4032,7 +4045,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 
 		ac->ac_op = EXT4_MB_HISTORY_ALLOC;
 		ext4_mb_normalize_request(ac, ar);
-
 repeat:
 		/* allocate space in core */
 		ext4_mb_regular_allocator(ac);
@@ -4046,10 +4058,21 @@ repeat:
 	}
 
 	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
-		ext4_mb_mark_diskspace_used(ac, handle);
-		*errp = 0;
-		block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
-		ar->len = ac->ac_b_ex.fe_len;
+		*errp = ext4_mb_mark_diskspace_used(ac, handle);
+		if (*errp ==  -EAGAIN) {
+			ac->ac_b_ex.fe_group = 0;
+			ac->ac_b_ex.fe_start = 0;
+			ac->ac_b_ex.fe_len = 0;
+			ac->ac_status = AC_STATUS_CONTINUE;
+			goto repeat;
+		} else if (*errp) {
+			ac->ac_b_ex.fe_len = 0;
+			ar->len = 0;
+			ext4_mb_show_ac(ac);
+		} else {
+			block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+			ar->len = ac->ac_b_ex.fe_len;
+		}
 	} else {
 		freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
 		if (freed)
@@ -4236,6 +4259,8 @@ do_more:
 		ext4_error(sb, __func__,
 			   "Freeing blocks in system zone - "
 			   "Block = %lu, count = %lu", block, count);
+		/* err = 0. ext4_std_error should be a no op */
+		goto error_return;
 	}
 
 	BUFFER_TRACE(bitmap_bh, "getting write access");
-- 
cgit v1.2.3


From 03cddb80ed2dacaf03c370d38bcc75f8303a03b8 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 5 Jun 2008 20:59:29 -0400
Subject: ext4: Fix use of uninitialized data with debug enabled.

Fix use of uninitialized data with debug enabled.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 873ad9b3418..c9900aade15 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2745,8 +2745,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	sbi = EXT4_SB(sb);
 	es = sbi->s_es;
 
-	ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
-			gdp->bg_free_blocks_count);
 
 	err = -EIO;
 	bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -2762,6 +2760,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	if (!gdp)
 		goto out_err;
 
+	ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
+			gdp->bg_free_blocks_count);
+
 	err = ext4_journal_get_write_access(handle, gdp_bh);
 	if (err)
 		goto out_err;
@@ -3094,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
 static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 				struct ext4_prealloc_space *pa)
 {
-	unsigned len = ac->ac_o_ex.fe_len;
-
+	unsigned int len = ac->ac_o_ex.fe_len;
 	ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
 					&ac->ac_b_ex.fe_group,
 					&ac->ac_b_ex.fe_start);
-- 
cgit v1.2.3


From 0bf7e8379ce7e0159a2a6bd3d937f2f6ada79799 Mon Sep 17 00:00:00 2001
From: "Jose R. Santos" <jrs@us.ibm.com>
Date: Tue, 3 Jun 2008 14:07:29 -0400
Subject: ext4: Fix uninit block group initialization with FLEX_BG

With FLEX_BG block bitmaps, inode bitmaps and inode tables _MAY_ be
allocated outside the group.  So, when initializing an uninitialized
block bitmap, we need to check the location of this blocks before
setting the corresponding bits in the block bitmap of the newly
initialized group.  Also return the right number of free blocks when
counting the available free blocks in uninit group.

Tested-by: Aneesh Kumar K.V <aneesh.kumar@inux.vnet.ibm.com>
Signed-off-by: Jose R. Santos <jrs@us.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/balloc.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 62 insertions(+), 9 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 30494c5da84..9cc80b9cc8d 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -43,6 +43,46 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 
 }
 
+static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
+			ext4_group_t block_group)
+{
+	ext4_group_t actual_group;
+	ext4_get_group_no_and_offset(sb, block, &actual_group, 0);
+	if (actual_group == block_group)
+		return 1;
+	return 0;
+}
+
+static int ext4_group_used_meta_blocks(struct super_block *sb,
+				ext4_group_t block_group)
+{
+	ext4_fsblk_t tmp;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	/* block bitmap, inode bitmap, and inode table blocks */
+	int used_blocks = sbi->s_itb_per_group + 2;
+
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+		struct ext4_group_desc *gdp;
+		struct buffer_head *bh;
+
+		gdp = ext4_get_group_desc(sb, block_group, &bh);
+		if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
+					block_group))
+			used_blocks--;
+
+		if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp),
+					block_group))
+			used_blocks--;
+
+		tmp = ext4_inode_table(sb, gdp);
+		for (; tmp < ext4_inode_table(sb, gdp) +
+				sbi->s_itb_per_group; tmp++) {
+			if (!ext4_block_in_group(sb, tmp, block_group))
+				used_blocks -= 1;
+		}
+	}
+	return used_blocks;
+}
 /* Initializes an uninitialized block bitmap if given, and returns the
  * number of blocks free in the group. */
 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
@@ -105,20 +145,34 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 	free_blocks = group_blocks - bit_max;
 
 	if (bh) {
-		ext4_fsblk_t start;
+		ext4_fsblk_t start, tmp;
+		int flex_bg = 0;
 
 		for (bit = 0; bit < bit_max; bit++)
 			ext4_set_bit(bit, bh->b_data);
 
 		start = ext4_group_first_block_no(sb, block_group);
 
-		/* Set bits for block and inode bitmaps, and inode table */
-		ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
-		ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
-		for (bit = (ext4_inode_table(sb, gdp) - start),
-		     bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
-			ext4_set_bit(bit, bh->b_data);
+		if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+					      EXT4_FEATURE_INCOMPAT_FLEX_BG))
+			flex_bg = 1;
 
+		/* Set bits for block and inode bitmaps, and inode table */
+		tmp = ext4_block_bitmap(sb, gdp);
+		if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+			ext4_set_bit(tmp - start, bh->b_data);
+
+		tmp = ext4_inode_bitmap(sb, gdp);
+		if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+			ext4_set_bit(tmp - start, bh->b_data);
+
+		tmp = ext4_inode_table(sb, gdp);
+		for (; tmp < ext4_inode_table(sb, gdp) +
+				sbi->s_itb_per_group; tmp++) {
+			if (!flex_bg ||
+				ext4_block_in_group(sb, tmp, block_group))
+				ext4_set_bit(tmp - start, bh->b_data);
+		}
 		/*
 		 * Also if the number of blocks within the group is
 		 * less than the blocksize * 8 ( which is the size
@@ -126,8 +180,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		 */
 		mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
 	}
-
-	return free_blocks - sbi->s_itb_per_group - 2;
+	return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
 }
 
 
-- 
cgit v1.2.3


From 944600930a37aa725ba6f93c3244e2d77a1e3581 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@redhat.com>
Date: Fri, 6 Jun 2008 18:05:52 -0400
Subject: ext4: fix online resize bug

There is a bug when we are trying to verify that the reserve inode's
double indirect blocks point back to the primary gdt blocks.  The fix is
obvious, we need to mod the gdb count by the addr's per block.  This was
verified using the same testcase as with the ext3 equivalent of this
patch.

Signed-off-by: Josef Bacik <jbacik@redhat.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/resize.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9f086a6a472..9ecb92f6854 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -563,7 +563,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
-	data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count;
+	data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
+					 EXT4_ADDR_PER_BLOCK(sb));
 	end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
 
 	/* Get each reserved primary GDT block and verify it holds backups */
-- 
cgit v1.2.3


From 624080eded68738daee041ad64672a9d2614754f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 6 Jun 2008 17:50:40 -0400
Subject: jbd2: If a journal checksum error is detected, propagate the error to
 ext4

If a journal checksum error is detected, the ext4 filesystem will call
ext4_error(), and the mount will either continue, become a read-only
mount, or cause a kernel panic based on the superblock flags
indicating the user's preference of what to do in case of filesystem
corruption being detected.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 09d9359c805..d01a32e8b50 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2189,6 +2189,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
 		if (ext4_load_journal(sb, es, journal_devnum))
 			goto failed_mount3;
+		if (!(sb->s_flags & MS_RDONLY) &&
+		    EXT4_SB(sb)->s_journal->j_failed_commit) {
+			printk(KERN_CRIT "EXT4-fs error (device %s): "
+			       "ext4_fill_super: Journal transaction "
+			       "%u is corrupt\n", sb->s_id, 
+			       EXT4_SB(sb)->s_journal->j_failed_commit);
+			if (test_opt (sb, ERRORS_RO)) {
+				printk (KERN_CRIT
+					"Mounting filesystem read-only\n");
+				sb->s_flags |= MS_RDONLY;
+				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+			}
+			if (test_opt(sb, ERRORS_PANIC)) {
+				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+				ext4_commit_super(sb, es, 1);
+				printk(KERN_CRIT
+				       "EXT4-fs (device %s): mount failed\n",
+				      sb->s_id);
+				goto failed_mount4;
+			}
+		}
 	} else if (journal_inum) {
 		if (ext4_create_journal(sb, es, journal_inum))
 			goto failed_mount3;
-- 
cgit v1.2.3


From cd0b6a39a1d68b61b1073662f40f747c8b728f98 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 26 May 2008 10:28:28 -0400
Subject: ext4: Display the journal_async_commit mount option in /proc/mounts

Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: Girish Shilamkar <girish@clusterfs.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d01a32e8b50..ba92c606ad9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -731,6 +731,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	}
 	if (test_opt(sb, BARRIER))
 		seq_puts(seq, ",barrier=1");
+	if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
+		seq_puts(seq, ",journal_async_commit");
 	if (test_opt(sb, NOBH))
 		seq_puts(seq, ",nobh");
 	if (!test_opt(sb, EXTENTS))
-- 
cgit v1.2.3


From 571640cad3fda6475da45d91cf86076f1f86bd9b Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 26 May 2008 12:29:46 -0400
Subject: ext4: enable barriers by default

I can't think of any valid reason for ext4 to not use barriers when
they are available;  I believe this is necessary for filesystem
integrity in the face of a volatile write cache on storage.

An administrator who trusts that the cache is sufficiently battery-
backed (and power supplies are sufficiently redundant, etc...)
can always turn it back off again.

SuSE has carried such a patch for ext3 for quite some time now.

Also document the mount option while we're at it.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba92c606ad9..cb96f127c36 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -671,6 +671,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	unsigned long def_mount_opts;
 	struct super_block *sb = vfs->mnt_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	journal_t *journal = sbi->s_journal;
 	struct ext4_super_block *es = sbi->s_es;
 
 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -729,8 +730,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_printf(seq, ",commit=%u",
 			   (unsigned) (sbi->s_commit_interval / HZ));
 	}
-	if (test_opt(sb, BARRIER))
-		seq_puts(seq, ",barrier=1");
+	/*
+	 * We're changing the default of barrier mount option, so
+	 * let's always display its mount state so it's clear what its
+	 * status is.
+	 */
+	seq_puts(seq, ",barrier=");
+	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
 	if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
 		seq_puts(seq, ",journal_async_commit");
 	if (test_opt(sb, NOBH))
@@ -1909,6 +1915,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
 
 	set_opt(sbi->s_mount_opt, RESERVATION);
+	set_opt(sbi->s_mount_opt, BARRIER);
 
 	/*
 	 * turn on extents feature by default in ext4 filesystem
-- 
cgit v1.2.3