aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/fuse/file.c10
-rw-r--r--fs/jfs/inode.c16
-rw-r--r--fs/jfs/jfs_inode.h1
-rw-r--r--fs/jfs/super.c118
-rw-r--r--fs/locks.c6
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c1
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c43
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/suballoc.c261
-rw-r--r--fs/ocfs2/suballoc.h2
-rw-r--r--fs/ocfs2/super.c8
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/xfs/xfs_alloc.c103
15 files changed, 457 insertions, 131 deletions
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index ba1c88af49f..82011019494 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -308,7 +308,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di
if (adfs_checkmap(sb, dm))
return dm;
- adfs_error(sb, NULL, "map corrupted");
+ adfs_error(sb, "map corrupted");
error_free:
while (--zone >= 0)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 63614ed1633..5c4fcd1dbf5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -395,14 +395,16 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
struct fuse_readpages_data data;
int err;
+ err = -EIO;
if (is_bad_inode(inode))
- return -EIO;
+ goto clean_pages_up;
data.file = file;
data.inode = inode;
data.req = fuse_get_req(fc);
+ err = PTR_ERR(data.req);
if (IS_ERR(data.req))
- return PTR_ERR(data.req);
+ goto clean_pages_up;
err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
if (!err) {
@@ -412,6 +414,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
fuse_put_request(fc, data.req);
}
return err;
+
+clean_pages_up:
+ put_pages_list(pages);
+ return err;
}
static size_t fuse_send_write(struct fuse_req *req, struct file *file,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 43e3f566aad..a223cf4faa9 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -168,16 +168,15 @@ void jfs_dirty_inode(struct inode *inode)
set_cflag(COMMIT_Dirty, inode);
}
-static int
-jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
- struct buffer_head *bh_result, int create)
+int jfs_get_block(struct inode *ip, sector_t lblock,
+ struct buffer_head *bh_result, int create)
{
s64 lblock64 = lblock;
int rc = 0;
xad_t xad;
s64 xaddr;
int xflag;
- s32 xlen = max_blocks;
+ s32 xlen = bh_result->b_size >> ip->i_blkbits;
/*
* Take appropriate lock on inode
@@ -188,7 +187,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
IREAD_LOCK(ip);
if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
- (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) &&
+ (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) &&
xaddr) {
if (xflag & XAD_NOTRECORDED) {
if (!create)
@@ -255,13 +254,6 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
return rc;
}
-static int jfs_get_block(struct inode *ip, sector_t lblock,
- struct buffer_head *bh_result, int create)
-{
- return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits,
- bh_result, create);
-}
-
static int jfs_writepage(struct page *page, struct writeback_control *wbc)
{
return nobh_writepage(page, jfs_get_block, wbc);
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index b5c7da6190d..1fc48df670c 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t);
extern void jfs_free_zero_link(struct inode *);
extern struct dentry *jfs_get_parent(struct dentry *dentry);
extern void jfs_set_inode_flags(struct inode *);
+extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern const struct address_space_operations jfs_aops;
extern struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4f6cfebc82d..143bcd1d5ea 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -26,6 +26,7 @@
#include <linux/moduleparam.h>
#include <linux/kthread.h>
#include <linux/posix_acl.h>
+#include <linux/buffer_head.h>
#include <asm/uaccess.h>
#include <linux/seq_file.h>
@@ -298,7 +299,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
break;
}
-#if defined(CONFIG_QUOTA)
+#ifdef CONFIG_QUOTA
case Opt_quota:
case Opt_usrquota:
*flag |= JFS_USRQUOTA;
@@ -597,7 +598,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (sbi->flag & JFS_NOINTEGRITY)
seq_puts(seq, ",nointegrity");
-#if defined(CONFIG_QUOTA)
+#ifdef CONFIG_QUOTA
if (sbi->flag & JFS_USRQUOTA)
seq_puts(seq, ",usrquota");
@@ -608,6 +609,113 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
return 0;
}
+#ifdef CONFIG_QUOTA
+
+/* Read data from quotafile - avoid pagecache and such because we cannot afford
+ * acquiring the locks... As quota files are never truncated and quota code
+ * itself serializes the operations (and noone else should touch the files)
+ * we don't have to be afraid of races */
+static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
+ size_t len, loff_t off)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ sector_t blk = off >> sb->s_blocksize_bits;
+ int err = 0;
+ int offset = off & (sb->s_blocksize - 1);
+ int tocopy;
+ size_t toread;
+ struct buffer_head tmp_bh;
+ struct buffer_head *bh;
+ loff_t i_size = i_size_read(inode);
+
+ if (off > i_size)
+ return 0;
+ if (off+len > i_size)
+ len = i_size-off;
+ toread = len;
+ while (toread > 0) {
+ tocopy = sb->s_blocksize - offset < toread ?
+ sb->s_blocksize - offset : toread;
+
+ tmp_bh.b_state = 0;
+ tmp_bh.b_size = 1 << inode->i_blkbits;
+ err = jfs_get_block(inode, blk, &tmp_bh, 0);
+ if (err)
+ return err;
+ if (!buffer_mapped(&tmp_bh)) /* A hole? */
+ memset(data, 0, tocopy);
+ else {
+ bh = sb_bread(sb, tmp_bh.b_blocknr);
+ if (!bh)
+ return -EIO;
+ memcpy(data, bh->b_data+offset, tocopy);
+ brelse(bh);
+ }
+ offset = 0;
+ toread -= tocopy;
+ data += tocopy;
+ blk++;
+ }
+ return len;
+}
+
+/* Write to quotafile */
+static ssize_t jfs_quota_write(struct super_block *sb, int type,
+ const char *data, size_t len, loff_t off)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ sector_t blk = off >> sb->s_blocksize_bits;
+ int err = 0;
+ int offset = off & (sb->s_blocksize - 1);
+ int tocopy;
+ size_t towrite = len;
+ struct buffer_head tmp_bh;
+ struct buffer_head *bh;
+
+ mutex_lock(&inode->i_mutex);
+ while (towrite > 0) {
+ tocopy = sb->s_blocksize - offset < towrite ?
+ sb->s_blocksize - offset : towrite;
+
+ tmp_bh.b_state = 0;
+ tmp_bh.b_size = 1 << inode->i_blkbits;
+ err = jfs_get_block(inode, blk, &tmp_bh, 1);
+ if (err)
+ goto out;
+ if (offset || tocopy != sb->s_blocksize)
+ bh = sb_bread(sb, tmp_bh.b_blocknr);
+ else
+ bh = sb_getblk(sb, tmp_bh.b_blocknr);
+ if (!bh) {
+ err = -EIO;
+ goto out;
+ }
+ lock_buffer(bh);
+ memcpy(bh->b_data+offset, data, tocopy);
+ flush_dcache_page(bh->b_page);
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ brelse(bh);
+ offset = 0;
+ towrite -= tocopy;
+ data += tocopy;
+ blk++;
+ }
+out:
+ if (len == towrite)
+ return err;
+ if (inode->i_size < off+len-towrite)
+ i_size_write(inode, off+len-towrite);
+ inode->i_version++;
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ mutex_unlock(&inode->i_mutex);
+ return len - towrite;
+}
+
+#endif
+
static struct super_operations jfs_super_operations = {
.alloc_inode = jfs_alloc_inode,
.destroy_inode = jfs_destroy_inode,
@@ -621,7 +729,11 @@ static struct super_operations jfs_super_operations = {
.unlockfs = jfs_unlockfs,
.statfs = jfs_statfs,
.remount_fs = jfs_remount,
- .show_options = jfs_show_options
+ .show_options = jfs_show_options,
+#ifdef CONFIG_QUOTA
+ .quota_read = jfs_quota_read,
+ .quota_write = jfs_quota_write,
+#endif
};
static struct export_operations jfs_export_operations = {
diff --git a/fs/locks.c b/fs/locks.c
index b0b41a64e10..d7c53392cac 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1421,8 +1421,9 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
if (!leases_enable)
goto out;
- error = lease_alloc(filp, arg, &fl);
- if (error)
+ error = -ENOMEM;
+ fl = locks_alloc_lock();
+ if (fl == NULL)
goto out;
locks_copy_lock(fl, lease);
@@ -1430,6 +1431,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
locks_insert_lock(before, fl);
*flp = fl;
+ error = 0;
out:
return error;
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 1b8346dd057..9503240ef0e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2375,7 +2375,6 @@ leave:
mlog(0, "returning %d\n", ret);
return ret;
}
-EXPORT_SYMBOL_GPL(dlm_migrate_lockres);
int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index b0c3134f4f7..37be4b2e0d4 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
else
status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
- if (status != DLM_NORMAL)
+ if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node))
goto leave;
/* By now this has been masked out of cancel requests. */
@@ -183,8 +183,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
spin_lock(&lock->spinlock);
/* if the master told us the lock was already granted,
* let the ast handle all of these actions */
- if (status == DLM_NORMAL &&
- lksb->status == DLM_CANCELGRANT) {
+ if (status == DLM_CANCELGRANT) {
actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
DLM_UNLOCK_REGRANT_LOCK|
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
@@ -349,14 +348,9 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
vec, veclen, owner, &status);
if (tmpret >= 0) {
// successfully sent and received
- if (status == DLM_CANCELGRANT)
- ret = DLM_NORMAL;
- else if (status == DLM_FORWARD) {
+ if (status == DLM_FORWARD)
mlog(0, "master was in-progress. retry\n");
- ret = DLM_FORWARD;
- } else
- ret = status;
- lksb->status = status;
+ ret = status;
} else {
mlog_errno(tmpret);
if (dlm_is_host_down(tmpret)) {
@@ -372,7 +366,6 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
/* something bad. this will BUG in ocfs2 */
ret = dlm_err_to_dlm_status(tmpret);
}
- lksb->status = ret;
}
return ret;
@@ -483,6 +476,10 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
/* lock was found on queue */
lksb = lock->lksb;
+ if (flags & (LKM_VALBLK|LKM_PUT_LVB) &&
+ lock->ml.type != LKM_EXMODE)
+ flags &= ~(LKM_VALBLK|LKM_PUT_LVB);
+
/* unlockast only called on originating node */
if (flags & LKM_PUT_LVB) {
lksb->flags |= DLM_LKSB_PUT_LVB;
@@ -507,11 +504,8 @@ not_found:
"cookie=%u:%llu\n",
dlm_get_lock_cookie_node(unlock->cookie),
dlm_get_lock_cookie_seq(unlock->cookie));
- else {
- /* send the lksb->status back to the other node */
- status = lksb->status;
+ else
dlm_lock_put(lock);
- }
leave:
if (res)
@@ -533,26 +527,22 @@ static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
if (dlm_lock_on_list(&res->blocked, lock)) {
/* cancel this outright */
- lksb->status = DLM_NORMAL;
status = DLM_NORMAL;
*actions = (DLM_UNLOCK_CALL_AST |
DLM_UNLOCK_REMOVE_LOCK);
} else if (dlm_lock_on_list(&res->converting, lock)) {
/* cancel the request, put back on granted */
- lksb->status = DLM_NORMAL;
status = DLM_NORMAL;
*actions = (DLM_UNLOCK_CALL_AST |
DLM_UNLOCK_REMOVE_LOCK |
DLM_UNLOCK_REGRANT_LOCK |
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
} else if (dlm_lock_on_list(&res->granted, lock)) {
- /* too late, already granted. DLM_CANCELGRANT */
- lksb->status = DLM_CANCELGRANT;
- status = DLM_NORMAL;
+ /* too late, already granted. */
+ status = DLM_CANCELGRANT;
*actions = DLM_UNLOCK_CALL_AST;
} else {
mlog(ML_ERROR, "lock to cancel is not on any list!\n");
- lksb->status = DLM_IVLOCKID;
status = DLM_IVLOCKID;
*actions = 0;
}
@@ -569,13 +559,11 @@ static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
/* unlock request */
if (!dlm_lock_on_list(&res->granted, lock)) {
- lksb->status = DLM_DENIED;
status = DLM_DENIED;
dlm_error(status);
*actions = 0;
} else {
/* unlock granted lock */
- lksb->status = DLM_NORMAL;
status = DLM_NORMAL;
*actions = (DLM_UNLOCK_FREE_LOCK |
DLM_UNLOCK_CALL_AST |
@@ -632,6 +620,8 @@ retry:
spin_lock(&res->spinlock);
is_master = (res->owner == dlm->node_num);
+ if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE)
+ flags &= ~LKM_VALBLK;
spin_unlock(&res->spinlock);
if (is_master) {
@@ -665,7 +655,7 @@ retry:
}
if (call_ast) {
- mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status);
+ mlog(0, "calling unlockast(%p, %d)\n", data, status);
if (is_master) {
/* it is possible that there is one last bast
* pending. make sure it is flushed, then
@@ -677,9 +667,12 @@ retry:
wait_event(dlm->ast_wq,
dlm_lock_basts_flushed(dlm, lock));
}
- (*unlockast)(data, lksb->status);
+ (*unlockast)(data, status);
}
+ if (status == DLM_CANCELGRANT)
+ status = DLM_NORMAL;
+
if (status == DLM_NORMAL) {
mlog(0, "kicking the thread\n");
dlm_kick_thread(dlm, res);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 0d1973ea32b..1f17a4d0828 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
mlog(0, "Allocating %u clusters for a new window.\n",
ocfs2_local_alloc_window_bits(osb));
+
+ /* Instruct the allocation code to try the most recently used
+ * cluster group. We'll re-record the group used this pass
+ * below. */
+ ac->ac_last_group = osb->la_last_gd;
+
/* we used the generic suballoc reserve function, but we set
* everything up nicely, so there's no reason why we can't use
* the more specific cluster api to claim bits. */
@@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
goto bail;
}
+ osb->la_last_gd = ac->ac_last_group;
+
la->la_bm_off = cpu_to_le32(cluster_off);
alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
/* just in case... In the future when we find space ourselves,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index cd4a6f253d1..0462a7f4e21 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -197,7 +197,6 @@ struct ocfs2_super
struct ocfs2_node_map recovery_map;
struct ocfs2_node_map umount_map;
- u32 num_clusters;
u64 root_blkno;
u64 system_dir_blkno;
u64 bitmap_blkno;
@@ -237,6 +236,7 @@ struct ocfs2_super
enum ocfs2_local_alloc_state local_alloc_state;
struct buffer_head *local_alloc_bh;
+ u64 la_last_gd;
/* Next two fields are for local node slot recovery during
* mount. */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 195523090c8..9d91e66f51a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode,
struct buffer_head *group_bh,
u32 bits_wanted, u32 min_bits,
u16 *bit_off, u16 *bits_found);
-static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
- u32 bits_wanted,
- u32 min_bits,
- u16 *bit_off,
- unsigned int *num_bits,
- u64 *bg_blkno);
static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
struct ocfs2_alloc_context *ac,
u32 bits_wanted,
@@ -85,11 +79,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
u64 *bg_blkno);
static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
int nr);
-static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
- struct buffer_head *bg_bh,
- unsigned int bits_wanted,
- u16 *bit_off,
- u16 *bits_found);
static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
@@ -143,6 +132,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
}
+/* somewhat more expensive than our other checks, so use sparingly. */
+static int ocfs2_check_group_descriptor(struct super_block *sb,
+ struct ocfs2_dinode *di,
+ struct ocfs2_group_desc *gd)
+{
+ unsigned int max_bits;
+
+ if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+ OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
+ return -EIO;
+ }
+
+ if (di->i_blkno != gd->bg_parent_dinode) {
+ ocfs2_error(sb, "Group descriptor # %llu has bad parent "
+ "pointer (%llu, expected %llu)",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ return -EIO;
+ }
+
+ max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
+ if (le16_to_cpu(gd->bg_bits) > max_bits) {
+ ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits));
+ return -EIO;
+ }
+
+ if (le16_to_cpu(gd->bg_chain) >=
+ le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
+ ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_chain));
+ return -EIO;
+ }
+
+ if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+ ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+ "claims that %u are free",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits),
+ le16_to_cpu(gd->bg_free_bits_count));
+ return -EIO;
+ }
+
+ if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+ ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+ "max bitmap bits of %u",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits),
+ 8 * le16_to_cpu(gd->bg_size));
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
struct inode *alloc_inode,
struct buffer_head *bg_bh,
@@ -663,6 +710,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
struct buffer_head *bg_bh,
unsigned int bits_wanted,
+ unsigned int total_bits,
u16 *bit_off,
u16 *bits_found)
{
@@ -679,10 +727,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
found = start = best_offset = best_size = 0;
bitmap = bg->bg_bitmap;
- while((offset = ocfs2_find_next_zero_bit(bitmap,
- le16_to_cpu(bg->bg_bits),
- start)) != -1) {
- if (offset == le16_to_cpu(bg->bg_bits))
+ while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
+ if (offset == total_bits)
break;
if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
@@ -911,14 +957,35 @@ static int ocfs2_cluster_group_search(struct inode *inode,
{
int search = -ENOSPC;
int ret;
- struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
+ struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
u16 tmp_off, tmp_found;
+ unsigned int max_bits, gd_cluster_off;
BUG_ON(!ocfs2_is_cluster_bitmap(inode));
- if (bg->bg_free_bits_count) {
+ if (gd->bg_free_bits_count) {
+ max_bits = le16_to_cpu(gd->bg_bits);
+
+ /* Tail groups in cluster bitmaps which aren't cpg
+ * aligned are prone to partial extention by a failed
+ * fs resize. If the file system resize never got to
+ * update the dinode cluster count, then we don't want
+ * to trust any clusters past it, regardless of what
+ * the group descriptor says. */
+ gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
+ le64_to_cpu(gd->bg_blkno));
+ if ((gd_cluster_off + max_bits) >
+ OCFS2_I(inode)->ip_clusters) {
+ max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
+ mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits),
+ OCFS2_I(inode)->ip_clusters, max_bits);
+ }
+
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
+ max_bits,
&tmp_off, &tmp_found);
if (ret)
return ret;
@@ -951,17 +1018,109 @@ static int ocfs2_block_group_search(struct inode *inode,
if (bg->bg_free_bits_count)
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
+ le16_to_cpu(bg->bg_bits),
bit_off, bits_found);
return ret;
}
+static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+ struct ocfs2_journal_handle *handle,
+ struct buffer_head *di_bh,
+ u32 num_bits,
+ u16 chain)
+{
+ int ret;
+ u32 tmp_used;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+ struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
+
+ ret = ocfs2_journal_access(handle, inode, di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
+ di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
+ le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
+
+ ret = ocfs2_journal_dirty(handle, di_bh);
+ if (ret < 0)
+ mlog_errno(ret);
+
+out:
+ return ret;
+}
+
+static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
+ u32 bits_wanted,
+ u32 min_bits,
+ u16 *bit_off,
+ unsigned int *num_bits,
+ u64 gd_blkno,
+ u16 *bits_left)
+{
+ int ret;
+ u16 found;
+ struct buffer_head *group_bh = NULL;
+ struct ocfs2_group_desc *gd;
+ struct inode *alloc_inode = ac->ac_inode;
+ struct ocfs2_journal_handle *handle = ac->ac_handle;
+
+ ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
+ &group_bh, OCFS2_BH_CACHED, alloc_inode);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ gd = (struct ocfs2_group_desc *) group_bh->b_data;
+ if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+ OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
+ ret = -EIO;
+ goto out;
+ }
+
+ ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
+ bit_off, &found);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ goto out;
+ }
+
+ *num_bits = found;
+
+ ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
+ *num_bits,
+ le16_to_cpu(gd->bg_chain));
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
+ *bit_off, *num_bits);
+ if (ret < 0)
+ mlog_errno(ret);
+
+ *bits_left = le16_to_cpu(gd->bg_free_bits_count);
+
+out:
+ brelse(group_bh);
+
+ return ret;
+}
+
static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
u32 bits_wanted,
u32 min_bits,
u16 *bit_off,
unsigned int *num_bits,
- u64 *bg_blkno)
+ u64 *bg_blkno,
+ u16 *bits_left)
{
int status;
u16 chain, tmp_bits;
@@ -988,9 +1147,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
+ status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+ if (status) {
+ mlog_errno(status);
goto bail;
}
@@ -1018,9 +1177,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
+ status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+ if (status) {
+ mlog_errno(status);
goto bail;
}
}
@@ -1099,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
(unsigned long long)fe->i_blkno);
*bg_blkno = le64_to_cpu(bg->bg_blkno);
+ *bits_left = le16_to_cpu(bg->bg_free_bits_count);
bail:
if (group_bh)
brelse(group_bh);
@@ -1120,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
{
int status;
u16 victim, i;
+ u16 bits_left = 0;
+ u64 hint_blkno = ac->ac_last_group;
struct ocfs2_chain_list *cl;
struct ocfs2_dinode *fe;
@@ -1146,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
goto bail;
}
+ if (hint_blkno) {
+ /* Attempt to short-circuit the usual search mechanism
+ * by jumping straight to the most recently used
+ * allocation group. This helps us mantain some
+ * contiguousness across allocations. */
+ status = ocfs2_search_one_group(ac, bits_wanted, min_bits,
+ bit_off, num_bits,
+ hint_blkno, &bits_left);
+ if (!status) {
+ /* Be careful to update *bg_blkno here as the
+ * caller is expecting it to be filled in, and
+ * ocfs2_search_one_group() won't do that for
+ * us. */
+ *bg_blkno = hint_blkno;
+ goto set_hint;
+ }
+ if (status < 0 && status != -ENOSPC) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
victim = ocfs2_find_victim_chain(cl);
@@ -1153,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
ac->ac_allow_chain_relink = 1;
status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off,
- num_bits, bg_blkno);
+ num_bits, bg_blkno, &bits_left);
if (!status)
- goto bail;
+ goto set_hint;
if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
goto bail;
@@ -1177,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
ac->ac_chain = i;
status = ocfs2_search_chain(ac, bits_wanted, min_bits,
- bit_off, num_bits,
- bg_blkno);
+ bit_off, num_bits, bg_blkno,
+ &bits_left);
if (!status)
break;
if (status < 0 && status != -ENOSPC) {
@@ -1186,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
goto bail;
}
}
-bail:
+set_hint:
+ if (status != -ENOSPC) {
+ /* If the next search of this group is not likely to
+ * yield a suitable extent, then we reset the last
+ * group hint so as to not waste a disk read */
+ if (bits_left < min_bits)
+ ac->ac_last_group = 0;
+ else
+ ac->ac_last_group = *bg_blkno;
+ }
+
+bail:
mlog_exit(status);
return status;
}
@@ -1341,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
{
int status;
unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
- u64 bg_blkno;
+ u64 bg_blkno = 0;
u16 bg_bit_off;
mlog_entry_void();
@@ -1494,9 +1689,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
}
group = (struct ocfs2_group_desc *) group_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(group)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group);
- status = -EIO;
+ status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
+ if (status) {
+ mlog_errno(status);
goto bail;
}
BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a76c82a7cea..c787838d105 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,6 +49,8 @@ struct ocfs2_alloc_context {
u16 ac_chain;
int ac_allow_chain_relink;
group_search_t *ac_group_search;
+
+ u64 ac_last_group;
};
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 382706a67ff..d17e33e66a1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,8 +1442,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
+ /* We don't have a cluster lock on the bitmap here because
+ * we're only interested in static information and the extra
+ * complexity at mount time isn't worht it. Don't pass the
+ * inode in to the read function though as we don't want it to
+ * be put in the cache. */
status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
- inode);
+ NULL);
iput(inode);
if (status < 0) {
mlog_errno(status);
@@ -1452,7 +1457,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
di = (struct ocfs2_dinode *) bitmap_bh->b_data;
osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
- osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
brelse(bitmap_bh);
mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
(unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4df822c881b..7de172efa08 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -115,6 +115,13 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL);
if (!ei)
return NULL;
+
+ ei->i_unique = 0;
+ ei->i_lenExtents = 0;
+ ei->i_next_alloc_block = 0;
+ ei->i_next_alloc_goal = 0;
+ ei->i_strat4096 = 0;
+
return &ei->vfs_inode;
}
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index eef6763f3a6..d2bbcd882a6 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1835,40 +1835,47 @@ xfs_alloc_fix_freelist(
&agbp)))
return error;
if (!pag->pagf_init) {
+ ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+ ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL;
return 0;
}
} else
agbp = NULL;
- /* If this is a metadata preferred pag and we are user data
+ /*
+ * If this is a metadata preferred pag and we are user data
* then try somewhere else if we are not being asked to
* try harder at this point
*/
- if (pag->pagf_metadata && args->userdata && flags) {
+ if (pag->pagf_metadata && args->userdata &&
+ (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
+ ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL;
return 0;
}
- need = XFS_MIN_FREELIST_PAG(pag, mp);
- delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
- /*
- * If it looks like there isn't a long enough extent, or enough
- * total blocks, reject it.
- */
- longest = (pag->pagf_longest > delta) ?
- (pag->pagf_longest - delta) :
- (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
- if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
- (!(flags & XFS_ALLOC_FLAG_FREEING) &&
- (int)(pag->pagf_freeblks + pag->pagf_flcount -
- need - args->total) <
- (int)args->minleft)) {
- if (agbp)
- xfs_trans_brelse(tp, agbp);
- args->agbp = NULL;
- return 0;
+ if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+ need = XFS_MIN_FREELIST_PAG(pag, mp);
+ delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
+ /*
+ * If it looks like there isn't a long enough extent, or enough
+ * total blocks, reject it.
+ */
+ longest = (pag->pagf_longest > delta) ?
+ (pag->pagf_longest - delta) :
+ (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+ if ((args->minlen + args->alignment + args->minalignslop - 1) >
+ longest ||
+ ((int)(pag->pagf_freeblks + pag->pagf_flcount -
+ need - args->total) < (int)args->minleft)) {
+ if (agbp)
+ xfs_trans_brelse(tp, agbp);
+ args->agbp = NULL;
+ return 0;
+ }
}
+
/*
* Get the a.g. freespace buffer.
* Can fail if we're not blocking on locks, and it's held.
@@ -1878,6 +1885,8 @@ xfs_alloc_fix_freelist(
&agbp)))
return error;
if (agbp == NULL) {
+ ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+ ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
args->agbp = NULL;
return 0;
}
@@ -1887,22 +1896,24 @@ xfs_alloc_fix_freelist(
*/
agf = XFS_BUF_TO_AGF(agbp);
need = XFS_MIN_FREELIST(agf, mp);
- delta = need > be32_to_cpu(agf->agf_flcount) ?
- (need - be32_to_cpu(agf->agf_flcount)) : 0;
/*
* If there isn't enough total or single-extent, reject it.
*/
- longest = be32_to_cpu(agf->agf_longest);
- longest = (longest > delta) ? (longest - delta) :
- (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
- if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
- (!(flags & XFS_ALLOC_FLAG_FREEING) &&
- (int)(be32_to_cpu(agf->agf_freeblks) +
- be32_to_cpu(agf->agf_flcount) - need - args->total) <
- (int)args->minleft)) {
- xfs_trans_brelse(tp, agbp);
- args->agbp = NULL;
- return 0;
+ if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+ delta = need > be32_to_cpu(agf->agf_flcount) ?
+ (need - be32_to_cpu(agf->agf_flcount)) : 0;
+ longest = be32_to_cpu(agf->agf_longest);
+ longest = (longest > delta) ? (longest - delta) :
+ (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
+ if ((args->minlen + args->alignment + args->minalignslop - 1) >
+ longest ||
+ ((int)(be32_to_cpu(agf->agf_freeblks) +
+ be32_to_cpu(agf->agf_flcount) - need - args->total) <
+ (int)args->minleft)) {
+ xfs_trans_brelse(tp, agbp);
+ args->agbp = NULL;
+ return 0;
+ }
}
/*
* Make the freelist shorter if it's too long.
@@ -1950,12 +1961,11 @@ xfs_alloc_fix_freelist(
* on a completely full ag.
*/
if (targs.agbno == NULLAGBLOCK) {
- if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
- xfs_trans_brelse(tp, agflbp);
- args->agbp = NULL;
- return 0;
- }
- break;
+ if (flags & XFS_ALLOC_FLAG_FREEING)
+ break;
+ xfs_trans_brelse(tp, agflbp);
+ args->agbp = NULL;
+ return 0;
}
/*
* Put each allocated block on the list.
@@ -2442,31 +2452,26 @@ xfs_free_extent(
xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len) /* length of extent */
{
-#ifdef DEBUG
- xfs_agf_t *agf; /* a.g. freespace header */
-#endif
- xfs_alloc_arg_t args; /* allocation argument structure */
+ xfs_alloc_arg_t args;
int error;
ASSERT(len != 0);
+ memset(&args, 0, sizeof(xfs_alloc_arg_t));
args.tp = tp;
args.mp = tp->t_mountp;
args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
ASSERT(args.agno < args.mp->m_sb.sb_agcount);
args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
- args.alignment = 1;
- args.minlen = args.minleft = args.minalignslop = 0;
down_read(&args.mp->m_peraglock);
args.pag = &args.mp->m_perag[args.agno];
if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
goto error0;
#ifdef DEBUG
ASSERT(args.agbp != NULL);
- agf = XFS_BUF_TO_AGF(args.agbp);
- ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length));
+ ASSERT((args.agbno + len) <=
+ be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
#endif
- error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
- len, 0);
+ error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
error0:
up_read(&args.mp->m_peraglock);
return error;