From 09010978345e8883003bf411bb99753710eb5a3a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 20 May 2009 10:48:47 +0100 Subject: GFS2: Improve resource group error handling This patch improves the error handling in the case where we discover that the summary information in the resource group doesn't match the bitmap information while in the process of allocating blocks. Originally this resulted in a kernel bug, but this patch changes that so that we return -EIO and print some messages explaining what went wrong, and how to fix it. We also remember locally not to try and allocate from the same rgrp again, so that a subsequent allocation in a different rgrp should succeed. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 58 +++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 15 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 565038243fa..fbacf09ee34 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -701,10 +701,7 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) u32 rg_flags; rg_flags = be32_to_cpu(str->rg_flags); - if (rg_flags & GFS2_RGF_NOALLOC) - rgd->rd_flags |= GFS2_RDF_NOALLOC; - else - rgd->rd_flags &= ~GFS2_RDF_NOALLOC; + rg_flags &= ~GFS2_RDF_MASK; rgd->rd_free = be32_to_cpu(str->rg_free); rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); @@ -713,11 +710,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) { struct gfs2_rgrp *str = buf; - u32 rg_flags = 0; - if (rgd->rd_flags & GFS2_RDF_NOALLOC) - rg_flags |= GFS2_RGF_NOALLOC; - str->rg_flags = cpu_to_be32(rg_flags); + str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); str->rg_free = cpu_to_be32(rgd->rd_free); str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); str->__pad = cpu_to_be32(0); @@ -942,7 +936,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) struct gfs2_sbd *sdp = rgd->rd_sbd; int ret = 0; - if (rgd->rd_flags & GFS2_RDF_NOALLOC) + if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; spin_lock(&sdp->sd_rindex_spin); @@ -1435,13 +1429,33 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, } /** - * gfs2_alloc_block - Allocate a block + * gfs2_rgrp_dump - print out an rgrp + * @seq: The iterator + * @gl: The glock in question + * + */ + +int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) +{ + const struct gfs2_rgrpd *rgd = gl->gl_object; + if (rgd == NULL) + return 0; + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", + (unsigned long long)rgd->rd_addr, rgd->rd_flags, + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); + return 0; +} + +/** + * gfs2_alloc_block - Allocate one or more blocks * @ip: the inode to allocate the block for + * @bn: Used to return the starting block number + * @n: requested number of blocks/extent length (value/result) * - * Returns: the allocated block + * Returns: 0 or error */ -u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) +int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; @@ -1457,7 +1471,10 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) goal = rgd->rd_last_alloc; blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); - BUG_ON(blk == BFITNOENT); + + /* Since all blocks are reserved in advance, this shouldn't happen */ + if (blk == BFITNOENT) + goto rgrp_error; rgd->rd_last_alloc = blk; block = rgd->rd_data0 + blk; @@ -1469,7 +1486,9 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); brelse(dibh); } - gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); + if (rgd->rd_free < *n) + goto rgrp_error; + rgd->rd_free -= *n; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1484,7 +1503,16 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) rgd->rd_free_clone -= *n; spin_unlock(&sdp->sd_rindex_spin); - return block; + *bn = block; + return 0; + +rgrp_error: + fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", + (unsigned long long)rgd->rd_addr); + fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); + gfs2_rgrp_dump(NULL, rgd->rd_gl); + rgd->rd_flags |= GFS2_RDF_ERROR; + return -EIO; } /** -- cgit v1.2.3 From 60a0b8f93664621a07b93273fc8ebc29590c62f5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 21 May 2009 12:23:12 +0100 Subject: GFS2: Add a rgrp bitmap full flag During block allocation, it is useful to know if sections of disk are full on a finer grained basis than a single resource group. This can make a performance difference when resource groups have larger numbers of bitmap blocks, since we no longer have to search them all block by block in each individual bitmap. The full flag is set on a per-bitmap basis when it has been searched and found to have no free space. It is then skipped in subsequent searches until the flag is reset. The resetting occurs if we have to drop the glock on the resource group for any reason, or if we deallocate some blocks within that resource group and thus free up some space. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 77 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 30 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index fbacf09ee34..23637b9d1c7 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -442,6 +442,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; + bi->bi_flags = 0; /* small rgrp; bitmap stored completely in header block */ if (length == 1) { bytes = bytes_left; @@ -769,6 +770,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { + for (x = 0; x < length; x++) + clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); rgd->rd_flags |= GFS2_RDF_UPTODATE; } @@ -897,6 +900,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) continue; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); + clear_bit(GBF_FULL, &bi->bi_flags); memcpy(bi->bi_clone + bi->bi_offset, bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); } @@ -1309,30 +1313,37 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, { struct gfs2_bitmap *bi = NULL; const u32 length = rgd->rd_length; - u32 blk = 0; + u32 blk = BFITNOENT; unsigned int buf, x; const unsigned int elen = *n; - const u8 *buffer; + const u8 *buffer = NULL; *n = 0; /* Find bitmap block that contains bits for goal block */ for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; + /* Convert scope of "goal" from rgrp-wide to within found bit block */ + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { + goal -= bi->bi_start * GFS2_NBBY; + goto do_search; + } } + buf = 0; + goal = 0; - gfs2_assert(rgd->rd_sbd, buf < length); - - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - goal -= bi->bi_start * GFS2_NBBY; - +do_search: /* Search (up to entire) bitmap in this rgrp for allocatable block. "x <= length", instead of "x < length", because we typically start the search in the middle of a bit block, but if we can't find an allocatable block anywhere else, we want to be able wrap around and search in the first part of our first-searched bit block. */ for (x = 0; x <= length; x++) { + bi = rgd->rd_bits + buf; + + if (test_bit(GBF_FULL, &bi->bi_flags) && + (old_state == GFS2_BLKST_FREE)) + goto skip; + /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone bitmaps, so we must search the originals for that. */ buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1343,33 +1354,39 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, if (blk != BFITNOENT) break; + if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) + set_bit(GBF_FULL, &bi->bi_flags); + /* Try next bitmap block (wrap back to rgrp header if at end) */ - buf = (buf + 1) % length; - bi = rgd->rd_bits + buf; +skip: + buf++; + buf %= length; goal = 0; } - if (blk != BFITNOENT && old_state != new_state) { - *n = 1; - gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + if (blk == BFITNOENT) + return blk; + *n = 1; + if (old_state == new_state) + goto out; + + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, + bi->bi_len, blk, new_state); + goal = blk; + while (*n < elen) { + goal++; + if (goal >= (bi->bi_len * GFS2_NBBY)) + break; + if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != + GFS2_BLKST_FREE) + break; gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi->bi_len, blk, new_state); - goal = blk; - while (*n < elen) { - goal++; - if (goal >= (bi->bi_len * GFS2_NBBY)) - break; - if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != - GFS2_BLKST_FREE) - break; - gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, - bi->bi_offset, bi->bi_len, goal, - new_state); - (*n)++; - } + bi->bi_len, goal, new_state); + (*n)++; } - - return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; +out: + return (bi->bi_start * GFS2_NBBY) + blk; } /** -- cgit v1.2.3 From 1ce97e564b628bee30b8dbb64e5e653a484308f6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 21 May 2009 15:18:19 +0100 Subject: GFS2: Be more aggressive in reclaiming unlinked inodes This patch increases the frequency with which gfs2 looks for unlinked, but still allocated inodes. Its the equivalent operation to ext3's orphan list, but done with bitmaps in the resource groups. This also fixes a bug where a field in the rgrp was too small. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 23637b9d1c7..ee3d5c1876a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -581,7 +581,6 @@ static int read_rindex_entry(struct gfs2_inode *ip, rgd->rd_gl->gl_object = rgd; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; - rgd->rd_flags |= GFS2_RDF_CHECK; return error; } @@ -703,6 +702,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) rg_flags = be32_to_cpu(str->rg_flags); rg_flags &= ~GFS2_RDF_MASK; + rgd->rd_flags &= GFS2_RDF_MASK; + rgd->rd_flags |= rg_flags; rgd->rd_free = be32_to_cpu(str->rg_free); rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); @@ -773,7 +774,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); - rgd->rd_flags |= GFS2_RDF_UPTODATE; + rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); } spin_lock(&sdp->sd_rindex_spin); -- cgit v1.2.3 From b1e71b0622974953e46a284aa986504a90869a9b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 22 May 2009 10:01:55 +0100 Subject: GFS2: Clean up some file names This patch renames the ops_*.c files which have no counterpart without the ops_ prefix in order to shorten the name and make it more readable. In addition, ops_address.h (which was very small) is moved into inode.h and inode.h is cleaned up by adding extern where required. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ee3d5c1876a..6122c7ee364 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -29,7 +29,6 @@ #include "util.h" #include "log.h" #include "inode.h" -#include "ops_address.h" #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -- cgit v1.2.3 From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:49 -0400 Subject: block: Do away with the notion of hardsect_size Until now we have had a 1:1 mapping between storage device physical block size and the logical block sized used when addressing the device. With SATA 4KB drives coming out that will no longer be the case. The sector size will be 4KB but the logical block size will remain 512-bytes. Hence we need to distinguish between the physical block size and the logical ditto. This patch renames hardsect_size to logical_block_size. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2/rgrp.c') diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 565038243fa..a971d24e10c 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -845,7 +845,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct super_block *sb = sdp->sd_vfs; struct block_device *bdev = sb->s_bdev; const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / - bdev_hardsect_size(sb->s_bdev); + bdev_logical_block_size(sb->s_bdev); u64 blk; sector_t start = 0; sector_t nr_sects = 0; -- cgit v1.2.3