aboutsummaryrefslogtreecommitdiff
path: root/fs/jfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jfs')
-rw-r--r--fs/jfs/inode.c35
-rw-r--r--fs/jfs/jfs_dmap.c12
-rw-r--r--fs/jfs/jfs_dtree.c6
-rw-r--r--fs/jfs/jfs_imap.c84
-rw-r--r--fs/jfs/jfs_incore.h1
-rw-r--r--fs/jfs/jfs_logmgr.c150
-rw-r--r--fs/jfs/jfs_logmgr.h9
-rw-r--r--fs/jfs/jfs_metapage.c908
-rw-r--r--fs/jfs/jfs_metapage.h80
-rw-r--r--fs/jfs/jfs_mount.c5
-rw-r--r--fs/jfs/jfs_txnmgr.c166
-rw-r--r--fs/jfs/jfs_umount.c16
-rw-r--r--fs/jfs/jfs_xtree.c63
-rw-r--r--fs/jfs/resize.c3
-rw-r--r--fs/jfs/super.c37
15 files changed, 994 insertions, 581 deletions
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 7bc906677b0..24a689179af 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -175,31 +175,22 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
{
s64 lblock64 = lblock;
int rc = 0;
- int take_locks;
xad_t xad;
s64 xaddr;
int xflag;
- s32 xlen;
-
- /*
- * If this is a special inode (imap, dmap)
- * the lock should already be taken
- */
- take_locks = (JFS_IP(ip)->fileset != AGGREGATE_I);
+ s32 xlen = max_blocks;
/*
* Take appropriate lock on inode
*/
- if (take_locks) {
- if (create)
- IWRITE_LOCK(ip);
- else
- IREAD_LOCK(ip);
- }
+ if (create)
+ IWRITE_LOCK(ip);
+ else
+ IREAD_LOCK(ip);
if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
- (xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)
- == 0) && xlen) {
+ (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) &&
+ xaddr) {
if (xflag & XAD_NOTRECORDED) {
if (!create)
/*
@@ -238,7 +229,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
#ifdef _JFS_4K
if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
goto unlock;
- rc = extAlloc(ip, max_blocks, lblock64, &xad, FALSE);
+ rc = extAlloc(ip, xlen, lblock64, &xad, FALSE);
if (rc)
goto unlock;
@@ -258,12 +249,10 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
/*
* Release lock on inode
*/
- if (take_locks) {
- if (create)
- IWRITE_UNLOCK(ip);
- else
- IREAD_UNLOCK(ip);
- }
+ if (create)
+ IWRITE_UNLOCK(ip);
+ else
+ IREAD_UNLOCK(ip);
return rc;
}
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index d86e467c6e4..69007fd546e 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -471,6 +471,7 @@ dbUpdatePMap(struct inode *ipbmap,
struct metapage *mp;
struct jfs_log *log;
int lsn, difft, diffp;
+ unsigned long flags;
/* the blocks better be within the mapsize. */
if (blkno + nblocks > bmp->db_mapsize) {
@@ -504,6 +505,7 @@ dbUpdatePMap(struct inode *ipbmap,
0);
if (mp == NULL)
return -EIO;
+ metapage_wait_for_io(mp);
}
dp = (struct dmap *) mp->data;
@@ -578,34 +580,32 @@ dbUpdatePMap(struct inode *ipbmap,
if (mp->lsn != 0) {
/* inherit older/smaller lsn */
logdiff(diffp, mp->lsn, log);
+ LOGSYNC_LOCK(log, flags);
if (difft < diffp) {
mp->lsn = lsn;
/* move bp after tblock in logsync list */
- LOGSYNC_LOCK(log);
list_move(&mp->synclist, &tblk->synclist);
- LOGSYNC_UNLOCK(log);
}
/* inherit younger/larger clsn */
- LOGSYNC_LOCK(log);
logdiff(difft, tblk->clsn, log);
logdiff(diffp, mp->clsn, log);
if (difft > diffp)
mp->clsn = tblk->clsn;
- LOGSYNC_UNLOCK(log);
+ LOGSYNC_UNLOCK(log, flags);
} else {
mp->log = log;
mp->lsn = lsn;
/* insert bp after tblock in logsync list */
- LOGSYNC_LOCK(log);
+ LOGSYNC_LOCK(log, flags);
log->count++;
list_add(&mp->synclist, &tblk->synclist);
mp->clsn = tblk->clsn;
- LOGSYNC_UNLOCK(log);
+ LOGSYNC_UNLOCK(log, flags);
}
}
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index e357890adfb..ac41f72d6d5 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -212,7 +212,7 @@ static struct metapage *read_index_page(struct inode *inode, s64 blkno)
s32 xlen;
rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
- if (rc || (xlen == 0))
+ if (rc || (xaddr == 0))
return NULL;
return read_metapage(inode, xaddr, PSIZE, 1);
@@ -231,7 +231,7 @@ static struct metapage *get_index_page(struct inode *inode, s64 blkno)
s32 xlen;
rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
- if (rc || (xlen == 0))
+ if (rc || (xaddr == 0))
return NULL;
return get_metapage(inode, xaddr, PSIZE, 1);
@@ -3181,7 +3181,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
d = (struct ldtentry *) & p->slot[stbl[i]];
if (((long) jfs_dirent + d->namlen + 1) >
- (dirent_buf + PSIZE)) {
+ (dirent_buf + PAGE_SIZE)) {
/* DBCS codepages could overrun dirent_buf */
index = i;
overflow = 1;
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 78383130162..7acff2ce3c8 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -502,7 +502,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
}
- ip->i_mapping->a_ops = &jfs_aops;
+ ip->i_mapping->a_ops = &jfs_metapage_aops;
mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
/* Allocations to metadata inodes should not affect quotas */
@@ -2573,9 +2573,18 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
goto out;
}
- /* assign a buffer for the page */
- mp = get_metapage(ipimap, xaddr, PSIZE, 1);
- if (!mp) {
+ /*
+ * start transaction of update of the inode map
+ * addressing structure pointing to the new iag page;
+ */
+ tid = txBegin(sb, COMMIT_FORCE);
+ down(&JFS_IP(ipimap)->commit_sem);
+
+ /* update the inode map addressing structure to point to it */
+ if ((rc =
+ xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
+ txEnd(tid);
+ up(&JFS_IP(ipimap)->commit_sem);
/* Free the blocks allocated for the iag since it was
* not successfully added to the inode map
*/
@@ -2584,6 +2593,29 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
/* release the inode map lock */
IWRITE_UNLOCK(ipimap);
+ goto out;
+ }
+
+ /* update the inode map's inode to reflect the extension */
+ ipimap->i_size += PSIZE;
+ inode_add_bytes(ipimap, PSIZE);
+
+ /* assign a buffer for the page */
+ mp = get_metapage(ipimap, blkno, PSIZE, 0);
+ if (!mp) {
+ /*
+ * This is very unlikely since we just created the
+ * extent, but let's try to handle it correctly
+ */
+ xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
+ COMMIT_PWMAP);
+
+ txAbort(tid, 0);
+ txEnd(tid);
+
+ /* release the inode map lock */
+ IWRITE_UNLOCK(ipimap);
+
rc = -EIO;
goto out;
}
@@ -2605,41 +2637,11 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
iagp->inosmap[i] = cpu_to_le32(ONES);
/*
- * Invalidate the page after writing and syncing it.
- * After it's initialized, we access it in a different
- * address space
+ * Write and sync the metapage
*/
- set_bit(META_discard, &mp->flag);
flush_metapage(mp);
/*
- * start tyransaction of update of the inode map
- * addressing structure pointing to the new iag page;
- */
- tid = txBegin(sb, COMMIT_FORCE);
- down(&JFS_IP(ipimap)->commit_sem);
-
- /* update the inode map addressing structure to point to it */
- if ((rc =
- xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
- txEnd(tid);
- up(&JFS_IP(ipimap)->commit_sem);
- /* Free the blocks allocated for the iag since it was
- * not successfully added to the inode map
- */
- dbFree(ipimap, xaddr, (s64) xlen);
-
- /* release the inode map lock */
- IWRITE_UNLOCK(ipimap);
-
- goto out;
- }
-
- /* update the inode map's inode to reflect the extension */
- ipimap->i_size += PSIZE;
- inode_add_bytes(ipimap, PSIZE);
-
- /*
* txCommit(COMMIT_FORCE) will synchronously write address
* index pages and inode after commit in careful update order
* of address index pages (right to left, bottom up);
@@ -2789,6 +2791,7 @@ diUpdatePMap(struct inode *ipimap,
u32 mask;
struct jfs_log *log;
int lsn, difft, diffp;
+ unsigned long flags;
imap = JFS_IP(ipimap)->i_imap;
/* get the iag number containing the inode */
@@ -2805,6 +2808,7 @@ diUpdatePMap(struct inode *ipimap,
IREAD_UNLOCK(ipimap);
if (rc)
return (rc);
+ metapage_wait_for_io(mp);
iagp = (struct iag *) mp->data;
/* get the inode number and extent number of the inode within
* the iag and the inode number within the extent.
@@ -2868,30 +2872,28 @@ diUpdatePMap(struct inode *ipimap,
/* inherit older/smaller lsn */
logdiff(difft, lsn, log);
logdiff(diffp, mp->lsn, log);
+ LOGSYNC_LOCK(log, flags);
if (difft < diffp) {
mp->lsn = lsn;
/* move mp after tblock in logsync list */
- LOGSYNC_LOCK(log);
list_move(&mp->synclist, &tblk->synclist);
- LOGSYNC_UNLOCK(log);
}
/* inherit younger/larger clsn */
- LOGSYNC_LOCK(log);
assert(mp->clsn);
logdiff(difft, tblk->clsn, log);
logdiff(diffp, mp->clsn, log);
if (difft > diffp)
mp->clsn = tblk->clsn;
- LOGSYNC_UNLOCK(log);
+ LOGSYNC_UNLOCK(log, flags);
} else {
mp->log = log;
mp->lsn = lsn;
/* insert mp after tblock in logsync list */
- LOGSYNC_LOCK(log);
+ LOGSYNC_LOCK(log, flags);
log->count++;
list_add(&mp->synclist, &tblk->synclist);
mp->clsn = tblk->clsn;
- LOGSYNC_UNLOCK(log);
+ LOGSYNC_UNLOCK(log, flags);
}
write_metapage(mp);
return (0);
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index ebd77c1bed6..c0fd7b3eadc 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -165,6 +165,7 @@ struct jfs_sb_info {
/* Formerly in ipbmap */
struct bmap *bmap; /* incore bmap descriptor */
struct nls_table *nls_tab; /* current codepage */
+ struct inode *direct_inode; /* metadata inode */
uint state; /* mount/recovery state */
unsigned long flag; /* mount time flags */
uint p_state; /* state prior to going no integrity */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index b6a6869ebb4..dfa1200daa6 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -234,6 +234,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
int lsn;
int diffp, difft;
struct metapage *mp = NULL;
+ unsigned long flags;
jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
log, tblk, lrd, tlck);
@@ -254,7 +255,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
*/
lsn = log->lsn;
- LOGSYNC_LOCK(log);
+ LOGSYNC_LOCK(log, flags);
/*
* initialize page lsn if first log write of the page
@@ -310,7 +311,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
}
}
- LOGSYNC_UNLOCK(log);
+ LOGSYNC_UNLOCK(log, flags);
/*
* write the log record
@@ -334,7 +335,6 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
return lsn;
}
-
/*
* NAME: lmWriteRecord()
*
@@ -927,9 +927,8 @@ static void lmPostGC(struct lbuf * bp)
* calculate new value of i_nextsync which determines when
* this code is called again.
*
- * this is called only from lmLog().
- *
- * PARAMETER: ip - pointer to logs inode.
+ * PARAMETERS: log - log structure
+ * nosyncwait - 1 if called asynchronously
*
* RETURN: 0
*
@@ -945,6 +944,15 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
struct lrd lrd;
int lsn;
struct logsyncblk *lp;
+ struct jfs_sb_info *sbi;
+ unsigned long flags;
+
+ /* push dirty metapages out to disk */
+ list_for_each_entry(sbi, &log->sb_list, log_list) {
+ filemap_flush(sbi->ipbmap->i_mapping);
+ filemap_flush(sbi->ipimap->i_mapping);
+ filemap_flush(sbi->direct_inode->i_mapping);
+ }
/*
* forward syncpt
@@ -954,10 +962,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
*/
if (log->sync == log->syncpt) {
- LOGSYNC_LOCK(log);
- /* ToDo: push dirty metapages out to disk */
-// bmLogSync(log);
-
+ LOGSYNC_LOCK(log, flags);
if (list_empty(&log->synclist))
log->sync = log->lsn;
else {
@@ -965,7 +970,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
struct logsyncblk, synclist);
log->sync = lp->lsn;
}
- LOGSYNC_UNLOCK(log);
+ LOGSYNC_UNLOCK(log, flags);
}
@@ -974,27 +979,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
* reset syncpt = sync
*/
if (log->sync != log->syncpt) {
- struct jfs_sb_info *sbi;
-
- /*
- * We need to make sure all of the "written" metapages
- * actually make it to disk
- */
- list_for_each_entry(sbi, &log->sb_list, log_list) {
- if (sbi->flag & JFS_NOINTEGRITY)
- continue;
- filemap_fdatawrite(sbi->ipbmap->i_mapping);
- filemap_fdatawrite(sbi->ipimap->i_mapping);
- filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping);
- }
- list_for_each_entry(sbi, &log->sb_list, log_list) {
- if (sbi->flag & JFS_NOINTEGRITY)
- continue;
- filemap_fdatawait(sbi->ipbmap->i_mapping);
- filemap_fdatawait(sbi->ipimap->i_mapping);
- filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping);
- }
-
lrd.logtid = 0;
lrd.backchain = 0;
lrd.type = cpu_to_le16(LOG_SYNCPT);
@@ -1066,6 +1050,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
return lsn;
}
+/*
+ * NAME: jfs_syncpt
+ *
+ * FUNCTION: write log SYNCPT record for specified log
+ *
+ * PARAMETERS: log - log structure
+ */
+void jfs_syncpt(struct jfs_log *log)
+{ LOG_LOCK(log);
+ lmLogSync(log, 1);
+ LOG_UNLOCK(log);
+}
/*
* NAME: lmLogOpen()
@@ -1547,6 +1543,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
{
int i;
struct tblock *target = NULL;
+ struct jfs_sb_info *sbi;
/* jfs_write_inode may call us during read-only mount */
if (!log)
@@ -1608,12 +1605,18 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
if (wait < 2)
return;
+ list_for_each_entry(sbi, &log->sb_list, log_list) {
+ filemap_fdatawrite(sbi->ipbmap->i_mapping);
+ filemap_fdatawrite(sbi->ipimap->i_mapping);
+ filemap_fdatawrite(sbi->direct_inode->i_mapping);
+ }
+
/*
* If there was recent activity, we may need to wait
* for the lazycommit thread to catch up
*/
if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
- for (i = 0; i < 800; i++) { /* Too much? */
+ for (i = 0; i < 200; i++) { /* Too much? */
msleep(250);
if (list_empty(&log->cqueue) &&
list_empty(&log->synclist))
@@ -1621,7 +1624,24 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
}
}
assert(list_empty(&log->cqueue));
- assert(list_empty(&log->synclist));
+ if (!list_empty(&log->synclist)) {
+ struct logsyncblk *lp;
+
+ list_for_each_entry(lp, &log->synclist, synclist) {
+ if (lp->xflag & COMMIT_PAGE) {
+ struct metapage *mp = (struct metapage *)lp;
+ dump_mem("orphan metapage", lp,
+ sizeof(struct metapage));
+ dump_mem("page", mp->page, sizeof(struct page));
+ }
+ else
+ dump_mem("orphan tblock", lp,
+ sizeof(struct tblock));
+ }
+// current->state = TASK_INTERRUPTIBLE;
+// schedule();
+ }
+ //assert(list_empty(&log->synclist));
clear_bit(log_FLUSH, &log->flag);
}
@@ -1669,6 +1689,7 @@ int lmLogShutdown(struct jfs_log * log)
lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
lbmIOWait(log->bp, lbmFREE);
+ log->bp = NULL;
/*
* synchronous update log superblock
@@ -1819,20 +1840,34 @@ static int lbmLogInit(struct jfs_log * log)
log->lbuf_free = NULL;
- for (i = 0; i < LOGPAGES; i++) {
- lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
- if (lbuf == 0)
- goto error;
- lbuf->l_ldata = (char *) get_zeroed_page(GFP_KERNEL);
- if (lbuf->l_ldata == 0) {
- kfree(lbuf);
+ for (i = 0; i < LOGPAGES;) {
+ char *buffer;
+ uint offset;
+ struct page *page;
+
+ buffer = (char *) get_zeroed_page(GFP_KERNEL);
+ if (buffer == NULL)
goto error;
+ page = virt_to_page(buffer);
+ for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
+ lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
+ if (lbuf == NULL) {
+ if (offset == 0)
+ free_page((unsigned long) buffer);
+ goto error;
+ }
+ if (offset) /* we already have one reference */
+ get_page(page);
+ lbuf->l_offset = offset;
+ lbuf->l_ldata = buffer + offset;
+ lbuf->l_page = page;
+ lbuf->l_log = log;
+ init_waitqueue_head(&lbuf->l_ioevent);
+
+ lbuf->l_freelist = log->lbuf_free;
+ log->lbuf_free = lbuf;
+ i++;
}
- lbuf->l_log = log;
- init_waitqueue_head(&lbuf->l_ioevent);
-
- lbuf->l_freelist = log->lbuf_free;
- log->lbuf_free = lbuf;
}
return (0);
@@ -1857,12 +1892,10 @@ static void lbmLogShutdown(struct jfs_log * log)
lbuf = log->lbuf_free;
while (lbuf) {
struct lbuf *next = lbuf->l_freelist;
- free_page((unsigned long) lbuf->l_ldata);
+ __free_page(lbuf->l_page);
kfree(lbuf);
lbuf = next;
}
-
- log->bp = NULL;
}
@@ -1974,9 +2007,9 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
bio->bi_bdev = log->bdev;
- bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata);
+ bio->bi_io_vec[0].bv_page = bp->l_page;
bio->bi_io_vec[0].bv_len = LOGPSIZE;
- bio->bi_io_vec[0].bv_offset = 0;
+ bio->bi_io_vec[0].bv_offset = bp->l_offset;
bio->bi_vcnt = 1;
bio->bi_idx = 0;
@@ -2115,9 +2148,9 @@ static void lbmStartIO(struct lbuf * bp)
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
bio->bi_bdev = log->bdev;
- bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata);
+ bio->bi_io_vec[0].bv_page = bp->l_page;
bio->bi_io_vec[0].bv_len = LOGPSIZE;
- bio->bi_io_vec[0].bv_offset = 0;
+ bio->bi_io_vec[0].bv_offset = bp->l_offset;
bio->bi_vcnt = 1;
bio->bi_idx = 0;
@@ -2127,16 +2160,13 @@ static void lbmStartIO(struct lbuf * bp)
bio->bi_private = bp;
/* check if journaling to disk has been disabled */
- if (!log->no_integrity) {
+ if (log->no_integrity) {
+ bio->bi_size = 0;
+ lbmIODone(bio, 0, 0);
+ } else {
submit_bio(WRITE_SYNC, bio);
INCREMENT(lmStat.submitted);
}
- else {
- bio->bi_size = 0;
- lbmIODone(bio, 0, 0); /* 2nd argument appears to not be used => 0
- * 3rd argument appears to not be used => 0
- */
- }
}
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 141ad74010c..51291fbc420 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -463,9 +463,10 @@ struct lbuf {
s64 l_blkno; /* 8: log page block number */
caddr_t l_ldata; /* 4: data page */
+ struct page *l_page; /* The page itself */
+ uint l_offset; /* Offset of l_ldata within the page */
wait_queue_head_t l_ioevent; /* 4: i/o done event */
- struct page *l_page; /* The page itself */
};
/* Reuse l_freelist for redrive list */
@@ -489,8 +490,9 @@ struct logsyncblk {
*/
#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
-#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock)
-#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock)
+#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
+#define LOGSYNC_UNLOCK(log, flags) \
+ spin_unlock_irqrestore(&(log)->synclock, flags)
/* compute the difference in bytes of lsn from sync point */
#define logdiff(diff, lsn, log)\
@@ -506,5 +508,6 @@ extern int lmLogShutdown(struct jfs_log * log);
extern int lmLogInit(struct jfs_log * log);
extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
extern void jfs_flush_journal(struct jfs_log * log, int wait);
+extern void jfs_syncpt(struct jfs_log *log);
#endif /* _H_JFS_LOGMGR */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 4c0a3ac75c0..41bf078dce0 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) International Business Machines Corp., 2000-2003
+ * Copyright (C) International Business Machines Corp., 2000-2005
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
@@ -18,10 +18,11 @@
*/
#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
#include <linux/init.h>
#include <linux/buffer_head.h>
#include <linux/mempool.h>
-#include <linux/delay.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_filsys.h"
@@ -29,8 +30,6 @@
#include "jfs_txnmgr.h"
#include "jfs_debug.h"
-static DEFINE_SPINLOCK(meta_lock);
-
#ifdef CONFIG_JFS_STATISTICS
static struct {
uint pagealloc; /* # of page allocations */
@@ -39,22 +38,8 @@ static struct {
} mpStat;
#endif
-
-#define HASH_BITS 10 /* This makes hash_table 1 4K page */
-#define HASH_SIZE (1 << HASH_BITS)
-static struct metapage **hash_table = NULL;
-static unsigned long hash_order;
-
-
-static inline int metapage_locked(struct metapage *mp)
-{
- return test_bit(META_locked, &mp->flag);
-}
-
-static inline int trylock_metapage(struct metapage *mp)
-{
- return test_and_set_bit(META_locked, &mp->flag);
-}
+#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
+#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag)
static inline void unlock_metapage(struct metapage *mp)
{
@@ -62,26 +47,26 @@ static inline void unlock_metapage(struct metapage *mp)
wake_up(&mp->wait);
}
-static void __lock_metapage(struct metapage *mp)
+static inline void __lock_metapage(struct metapage *mp)
{
DECLARE_WAITQUEUE(wait, current);
-
INCREMENT(mpStat.lockwait);
-
add_wait_queue_exclusive(&mp->wait, &wait);
do {
set_current_state(TASK_UNINTERRUPTIBLE);
if (metapage_locked(mp)) {
- spin_unlock(&meta_lock);
+ unlock_page(mp->page);
schedule();
- spin_lock(&meta_lock);
+ lock_page(mp->page);
}
} while (trylock_metapage(mp));
__set_current_state(TASK_RUNNING);
remove_wait_queue(&mp->wait, &wait);
}
-/* needs meta_lock */
+/*
+ * Must have mp->page locked
+ */
static inline void lock_metapage(struct metapage *mp)
{
if (trylock_metapage(mp))
@@ -92,6 +77,110 @@ static inline void lock_metapage(struct metapage *mp)
static kmem_cache_t *metapage_cache;
static mempool_t *metapage_mempool;
+#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
+
+#if MPS_PER_PAGE > 1
+
+struct meta_anchor {
+ int mp_count;
+ atomic_t io_count;
+ struct metapage *mp[MPS_PER_PAGE];
+};
+#define mp_anchor(page) ((struct meta_anchor *)page->private)
+
+static inline struct metapage *page_to_mp(struct page *page, uint offset)
+{
+ if (!PagePrivate(page))
+ return NULL;
+ return mp_anchor(page)->mp[offset >> L2PSIZE];
+}
+
+static inline int insert_metapage(struct page *page, struct metapage *mp)
+{
+ struct meta_anchor *a;
+ int index;
+ int l2mp_blocks; /* log2 blocks per metapage */
+
+ if (PagePrivate(page))
+ a = mp_anchor(page);
+ else {
+ a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS);
+ if (!a)
+ return -ENOMEM;
+ memset(a, 0, sizeof(struct meta_anchor));
+ page->private = (unsigned long)a;
+ SetPagePrivate(page);
+ kmap(page);
+ }
+
+ if (mp) {
+ l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
+ index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
+ a->mp_count++;
+ a->mp[index] = mp;
+ }
+
+ return 0;
+}
+
+static inline void remove_metapage(struct page *page, struct metapage *mp)
+{
+ struct meta_anchor *a = mp_anchor(page);
+ int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
+ int index;
+
+ index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
+
+ BUG_ON(a->mp[index] != mp);
+
+ a->mp[index] = NULL;
+ if (--a->mp_count == 0) {
+ kfree(a);
+ page->private = 0;
+ ClearPagePrivate(page);
+ kunmap(page);
+ }
+}
+
+static inline void inc_io(struct page *page)
+{
+ atomic_inc(&mp_anchor(page)->io_count);
+}
+
+static inline void dec_io(struct page *page, void (*handler) (struct page *))
+{
+ if (atomic_dec_and_test(&mp_anchor(page)->io_count))
+ handler(page);
+}
+
+#else
+static inline struct metapage *page_to_mp(struct page *page, uint offset)
+{
+ return PagePrivate(page) ? (struct metapage *)page->private : NULL;
+}
+
+static inline int insert_metapage(struct page *page, struct metapage *mp)
+{
+ if (mp) {
+ page->private = (unsigned long)mp;
+ SetPagePrivate(page);
+ kmap(page);
+ }
+ return 0;
+}
+
+static inline void remove_metapage(struct page *page, struct metapage *mp)
+{
+ page->private = 0;
+ ClearPagePrivate(page);
+ kunmap(page);
+}
+
+#define inc_io(page) do {} while(0)
+#define dec_io(page, handler) handler(page)
+
+#endif
+
static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
{
struct metapage *mp = (struct metapage *)foo;
@@ -139,16 +228,6 @@ int __init metapage_init(void)
kmem_cache_destroy(metapage_cache);
return -ENOMEM;
}
- /*
- * Now the hash list
- */
- for (hash_order = 0;
- ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
- hash_order++);
- hash_table =
- (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
- assert(hash_table);
- memset(hash_table, 0, PAGE_SIZE << hash_order);
return 0;
}
@@ -159,73 +238,388 @@ void metapage_exit(void)
kmem_cache_destroy(metapage_cache);
}
+static inline void drop_metapage(struct page *page, struct metapage *mp)
+{
+ if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
+ test_bit(META_io, &mp->flag))
+ return;
+ remove_metapage(page, mp);
+ INCREMENT(mpStat.pagefree);
+ free_metapage(mp);
+}
+
/*
- * Basically same hash as in pagemap.h, but using our hash table
+ * Metapage address space operations
*/
-static struct metapage **meta_hash(struct address_space *mapping,
- unsigned long index)
+
+static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
+ unsigned int *len)
{
-#define i (((unsigned long)mapping)/ \
- (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
-#define s(x) ((x) + ((x) >> HASH_BITS))
- return hash_table + (s(i + index) & (HASH_SIZE - 1));
-#undef i
-#undef s
+ int rc = 0;
+ int xflag;
+ s64 xaddr;
+ sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >>
+ inode->i_blkbits;
+
+ if (lblock >= file_blocks)
+ return 0;
+ if (lblock + *len > file_blocks)
+ *len = file_blocks - lblock;
+
+ if (inode->i_ino) {
+ rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
+ if ((rc == 0) && *len)
+ lblock = (sector_t)xaddr;
+ else
+ lblock = 0;
+ } /* else no mapping */
+
+ return lblock;
}
-static struct metapage *search_hash(struct metapage ** hash_ptr,
- struct address_space *mapping,
- unsigned long index)
+static void last_read_complete(struct page *page)
{
- struct metapage *ptr;
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+}
+
+static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
+ int err)
+{
+ struct page *page = bio->bi_private;
+
+ if (bio->bi_size)
+ return 1;
- for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
- if ((ptr->mapping == mapping) && (ptr->index == index))
- return ptr;
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ printk(KERN_ERR "metapage_read_end_io: I/O error\n");
+ SetPageError(page);
}
- return NULL;
+ dec_io(page, last_read_complete);
+ bio_put(bio);
+
+ return 0;
}
-static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
+static void remove_from_logsync(struct metapage *mp)
{
- if (*hash_ptr)
- (*hash_ptr)->hash_prev = mp;
+ struct jfs_log *log = mp->log;
+ unsigned long flags;
+/*
+ * This can race. Recheck that log hasn't been set to null, and after
+ * acquiring logsync lock, recheck lsn
+ */
+ if (!log)
+ return;
+
+ LOGSYNC_LOCK(log, flags);
+ if (mp->lsn) {
+ mp->log = NULL;
+ mp->lsn = 0;
+ mp->clsn = 0;
+ log->count--;
+ list_del(&mp->synclist);
+ }
+ LOGSYNC_UNLOCK(log, flags);
+}
- mp->hash_prev = NULL;
- mp->hash_next = *hash_ptr;
- *hash_ptr = mp;
+static void last_write_complete(struct page *page)
+{
+ struct metapage *mp;
+ unsigned int offset;
+
+ for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+ mp = page_to_mp(page, offset);
+ if (mp && test_bit(META_io, &mp->flag)) {
+ if (mp->lsn)
+ remove_from_logsync(mp);
+ clear_bit(META_io, &mp->flag);
+ }
+ /*
+ * I'd like to call drop_metapage here, but I don't think it's
+ * safe unless I have the page locked
+ */
+ }
+ end_page_writeback(page);
}
-static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
+static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done,
+ int err)
{
- if (mp->hash_prev)
- mp->hash_prev->hash_next = mp->hash_next;
- else {
- assert(*hash_ptr == mp);
- *hash_ptr = mp->hash_next;
+ struct page *page = bio->bi_private;
+
+ BUG_ON(!PagePrivate(page));
+
+ if (bio->bi_size)
+ return 1;
+
+ if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+ printk(KERN_ERR "metapage_write_end_io: I/O error\n");
+ SetPageError(page);
+ }
+ dec_io(page, last_write_complete);
+ bio_put(bio);
+ return 0;
+}
+
+static int metapage_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct bio *bio = NULL;
+ unsigned int block_offset; /* block offset of mp within page */
+ struct inode *inode = page->mapping->host;
+ unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
+ unsigned int len;
+ unsigned int xlen;
+ struct metapage *mp;
+ int redirty = 0;
+ sector_t lblock;
+ sector_t pblock;
+ sector_t next_block = 0;
+ sector_t page_start;
+ unsigned long bio_bytes = 0;
+ unsigned long bio_offset = 0;
+ unsigned int offset;
+
+ page_start = (sector_t)page->index <<
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+
+ for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+ mp = page_to_mp(page, offset);
+
+ if (!mp || !test_bit(META_dirty, &mp->flag))
+ continue;
+
+ if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
+ redirty = 1;
+ continue;
+ }
+
+ clear_bit(META_dirty, &mp->flag);
+ block_offset = offset >> inode->i_blkbits;
+ lblock = page_start + block_offset;
+ if (bio) {
+ if (xlen && lblock == next_block) {
+ /* Contiguous, in memory & on disk */
+ len = min(xlen, blocks_per_mp);
+ xlen -= len;
+ bio_bytes += len << inode->i_blkbits;
+ set_bit(META_io, &mp->flag);
+ continue;
+ }
+ /* Not contiguous */
+ if (bio_add_page(bio, page, bio_bytes, bio_offset) <
+ bio_bytes)
+ goto add_failed;
+ /*
+ * Increment counter before submitting i/o to keep
+ * count from hitting zero before we're through
+ */
+ inc_io(page);
+ if (!bio->bi_size)
+ goto dump_bio;
+ submit_bio(WRITE, bio);
+ bio = NULL;
+ } else {
+ set_page_writeback(page);
+ inc_io(page);
+ }
+ xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
+ pblock = metapage_get_blocks(inode, lblock, &xlen);
+ if (!pblock) {
+ /* Need better error handling */
+ printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
+ dec_io(page, last_write_complete);
+ continue;
+ }
+ set_bit(META_io, &mp->flag);
+ len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage);
+
+ bio = bio_alloc(GFP_NOFS, 1);
+ bio->bi_bdev = inode->i_sb->s_bdev;
+ bio->bi_sector = pblock << (inode->i_blkbits - 9);
+ bio->bi_end_io = metapage_write_end_io;
+ bio->bi_private = page;
+
+ /* Don't call bio_add_page yet, we may add to this vec */
+ bio_offset = offset;
+ bio_bytes = len << inode->i_blkbits;
+
+ xlen -= len;
+ next_block = lblock + len;
+ }
+ if (bio) {
+ if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
+ goto add_failed;
+ if (!bio->bi_size)
+ goto dump_bio;
+
+ submit_bio(WRITE, bio);
+ }
+ if (redirty)
+ redirty_page_for_writepage(wbc, page);
+
+ unlock_page(page);
+
+ return 0;
+add_failed:
+ /* We should never reach here, since we're only adding one vec */
+ printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
+ goto skip;
+dump_bio:
+ dump_mem("bio", bio, sizeof(*bio));
+skip:
+ bio_put(bio);
+ unlock_page(page);
+ dec_io(page, last_write_complete);
+
+ return -EIO;
+}
+
+static int metapage_readpage(struct file *fp, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct bio *bio = NULL;
+ unsigned int block_offset;
+ unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
+ sector_t page_start; /* address of page in fs blocks */
+ sector_t pblock;
+ unsigned int xlen;
+ unsigned int len;
+ unsigned int offset;
+
+ BUG_ON(!PageLocked(page));
+ page_start = (sector_t)page->index <<
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+ block_offset = 0;
+ while (block_offset < blocks_per_page) {
+ xlen = blocks_per_page - block_offset;
+ pblock = metapage_get_blocks(inode, page_start + block_offset,
+ &xlen);
+ if (pblock) {
+ if (!PagePrivate(page))
+ insert_metapage(page, NULL);
+ inc_io(page);
+ if (bio)
+ submit_bio(READ, bio);
+
+ bio = bio_alloc(GFP_NOFS, 1);
+ bio->bi_bdev = inode->i_sb->s_bdev;
+ bio->bi_sector = pblock << (inode->i_blkbits - 9);
+ bio->bi_end_io = metapage_read_end_io;
+ bio->bi_private = page;
+ len = xlen << inode->i_blkbits;
+ offset = block_offset << inode->i_blkbits;
+ if (bio_add_page(bio, page, len, offset) < len)
+ goto add_failed;
+ block_offset += xlen;
+ } else
+ block_offset++;
}
+ if (bio)
+ submit_bio(READ, bio);
+ else
+ unlock_page(page);
+
+ return 0;
- if (mp->hash_next)
- mp->hash_next->hash_prev = mp->hash_prev;
+add_failed:
+ printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
+ bio_put(bio);
+ dec_io(page, last_read_complete);
+ return -EIO;
}
+static int metapage_releasepage(struct page *page, int gfp_mask)
+{
+ struct metapage *mp;
+ int busy = 0;
+ unsigned int offset;
+
+ for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+ mp = page_to_mp(page, offset);
+
+ if (!mp)
+ continue;
+
+ jfs_info("metapage_releasepage: mp = 0x%p", mp);
+ if (mp->count || mp->nohomeok) {
+ jfs_info("count = %ld, nohomeok = %d", mp->count,
+ mp->nohomeok);
+ busy = 1;
+ continue;
+ }
+ wait_on_page_writeback(page);
+ //WARN_ON(test_bit(META_dirty, &mp->flag));
+ if (test_bit(META_dirty, &mp->flag)) {
+ dump_mem("dirty mp in metapage_releasepage", mp,
+ sizeof(struct metapage));
+ dump_mem("page", page, sizeof(struct page));
+ dump_stack();
+ }
+ WARN_ON(mp->lsn);
+ if (mp->lsn)
+ remove_from_logsync(mp);
+ remove_metapage(page, mp);
+ INCREMENT(mpStat.pagefree);
+ free_metapage(mp);
+ }
+ if (busy)
+ return -1;
+
+ return 0;
+}
+
+static int metapage_invalidatepage(struct page *page, unsigned long offset)
+{
+ BUG_ON(offset);
+
+ if (PageWriteback(page))
+ return 0;
+
+ return metapage_releasepage(page, 0);
+}
+
+struct address_space_operations jfs_metapage_aops = {
+ .readpage = metapage_readpage,
+ .writepage = metapage_writepage,
+ .sync_page = block_sync_page,
+ .releasepage = metapage_releasepage,
+ .invalidatepage = metapage_invalidatepage,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+};
+
struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
unsigned int size, int absolute,
unsigned long new)
{
- struct metapage **hash_ptr;
int l2BlocksPerPage;
int l2bsize;
struct address_space *mapping;
- struct metapage *mp;
+ struct metapage *mp = NULL;
+ struct page *page;
unsigned long page_index;
unsigned long page_offset;
- jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
-
+ jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
+ inode->i_ino, lblock, absolute);
+
+ l2bsize = inode->i_blkbits;
+ l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
+ page_index = lblock >> l2BlocksPerPage;
+ page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
+ if ((page_offset + size) > PAGE_CACHE_SIZE) {
+ jfs_err("MetaData crosses page boundary!!");
+ jfs_err("lblock = %lx, size = %d", lblock, size);
+ dump_stack();
+ return NULL;
+ }
if (absolute)
- mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
+ mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
else {
/*
* If an nfs client tries to read an inode that is larger
@@ -237,312 +631,212 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
mapping = inode->i_mapping;
}
- hash_ptr = meta_hash(mapping, lblock);
-again:
- spin_lock(&meta_lock);
- mp = search_hash(hash_ptr, mapping, lblock);
+ if (new && (PSIZE == PAGE_CACHE_SIZE)) {
+ page = grab_cache_page(mapping, page_index);
+ if (!page) {
+ jfs_err("grab_cache_page failed!");
+ return NULL;
+ }
+ SetPageUptodate(page);
+ } else {
+ page = read_cache_page(mapping, page_index,
+ (filler_t *)mapping->a_ops->readpage, NULL);
+ if (IS_ERR(page)) {
+ jfs_err("read_cache_page failed!");
+ return NULL;
+ }
+ lock_page(page);
+ }
+
+ mp = page_to_mp(page, page_offset);
if (mp) {
- page_found:
- if (test_bit(META_stale, &mp->flag)) {
- spin_unlock(&meta_lock);
- msleep(1);
- goto again;
+ if (mp->logical_size != size) {
+ jfs_error(inode->i_sb,
+ "__get_metapage: mp->logical_size != size");
+ jfs_err("logical_size = %d, size = %d",
+ mp->logical_size, size);
+ dump_stack();
+ goto unlock;
}
mp->count++;
lock_metapage(mp);
- spin_unlock(&meta_lock);
if (test_bit(META_discard, &mp->flag)) {
if (!new) {
jfs_error(inode->i_sb,
"__get_metapage: using a "
"discarded metapage");
- release_metapage(mp);
- return NULL;
+ discard_metapage(mp);
+ goto unlock;
}
clear_bit(META_discard, &mp->flag);
}
- jfs_info("__get_metapage: found 0x%p, in hash", mp);
- if (mp->logical_size != size) {
- jfs_error(inode->i_sb,
- "__get_metapage: mp->logical_size != size");
- release_metapage(mp);
- return NULL;
- }
} else {
- l2bsize = inode->i_blkbits;
- l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
- page_index = lblock >> l2BlocksPerPage;
- page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
- l2bsize;
- if ((page_offset + size) > PAGE_CACHE_SIZE) {
- spin_unlock(&meta_lock);
- jfs_err("MetaData crosses page boundary!!");
- return NULL;
- }
-
- /*
- * Locks held on aggregate inode pages are usually
- * not held long, and they are taken in critical code
- * paths (committing dirty inodes, txCommit thread)
- *
- * Attempt to get metapage without blocking, tapping into
- * reserves if necessary.
- */
- mp = NULL;
- if (JFS_IP(inode)->fileset == AGGREGATE_I) {
- mp = alloc_metapage(GFP_ATOMIC);
- if (!mp) {
- /*
- * mempool is supposed to protect us from
- * failing here. We will try a blocking
- * call, but a deadlock is possible here
- */
- printk(KERN_WARNING
- "__get_metapage: atomic call to mempool_alloc failed.\n");
- printk(KERN_WARNING
- "Will attempt blocking call\n");
- }
- }
- if (!mp) {
- struct metapage *mp2;
-
- spin_unlock(&meta_lock);
- mp = alloc_metapage(GFP_NOFS);
- spin_lock(&meta_lock);
-
- /* we dropped the meta_lock, we need to search the
- * hash again.
- */
- mp2 = search_hash(hash_ptr, mapping, lblock);
- if (mp2) {
- free_metapage(mp);
- mp = mp2;
- goto page_found;
- }
- }
+ INCREMENT(mpStat.pagealloc);
+ mp = alloc_metapage(GFP_NOFS);
+ mp->page = page;
mp->flag = 0;
- lock_metapage(mp);
- if (absolute)
- set_bit(META_absolute, &mp->flag);
mp->xflag = COMMIT_PAGE;
mp->count = 1;
- atomic_set(&mp->nohomeok,0);
- mp->mapping = mapping;
- mp->index = lblock;
- mp->page = NULL;
+ mp->nohomeok = 0;
mp->logical_size = size;
- add_to_hash(mp, hash_ptr);
- spin_unlock(&meta_lock);
-
- if (new) {
- jfs_info("__get_metapage: Calling grab_cache_page");
- mp->page = grab_cache_page(mapping, page_index);
- if (!mp->page) {
- jfs_err("grab_cache_page failed!");
- goto freeit;
- } else {
- INCREMENT(mpStat.pagealloc);
- unlock_page(mp->page);
- }
- } else {
- jfs_info("__get_metapage: Calling read_cache_page");
- mp->page = read_cache_page(mapping, lblock,
- (filler_t *)mapping->a_ops->readpage, NULL);
- if (IS_ERR(mp->page)) {
- jfs_err("read_cache_page failed!");
- goto freeit;
- } else
- INCREMENT(mpStat.pagealloc);
+ mp->data = page_address(page) + page_offset;
+ mp->index = lblock;
+ if (unlikely(insert_metapage(page, mp))) {
+ free_metapage(mp);
+ goto unlock;
}
- mp->data = kmap(mp->page) + page_offset;
+ lock_metapage(mp);
}
- if (new)
+ if (new) {
+ jfs_info("zeroing mp = 0x%p", mp);
memset(mp->data, 0, PSIZE);
+ }
- jfs_info("__get_metapage: returning = 0x%p", mp);
+ unlock_page(page);
+ jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
return mp;
-freeit:
- spin_lock(&meta_lock);
- remove_from_hash(mp, hash_ptr);
- free_metapage(mp);
- spin_unlock(&meta_lock);
+unlock:
+ unlock_page(page);
return NULL;
}
-void hold_metapage(struct metapage * mp, int force)
+void grab_metapage(struct metapage * mp)
{
- spin_lock(&meta_lock);
-
+ jfs_info("grab_metapage: mp = 0x%p", mp);
+ page_cache_get(mp->page);
+ lock_page(mp->page);
mp->count++;
-
- if (force) {
- ASSERT (!(test_bit(META_forced, &mp->flag)));
- if (trylock_metapage(mp))
- set_bit(META_forced, &mp->flag);
- } else
- lock_metapage(mp);
-
- spin_unlock(&meta_lock);
+ lock_metapage(mp);
+ unlock_page(mp->page);
}
-static void __write_metapage(struct metapage * mp)
+void force_metapage(struct metapage *mp)
{
- int l2bsize = mp->mapping->host->i_blkbits;
- int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
- unsigned long page_index;
- unsigned long page_offset;
- int rc;
-
- jfs_info("__write_metapage: mp = 0x%p", mp);
-
- page_index = mp->page->index;
- page_offset =
- (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
+ struct page *page = mp->page;
+ jfs_info("force_metapage: mp = 0x%p", mp);
+ set_bit(META_forcewrite, &mp->flag);
+ clear_bit(META_sync, &mp->flag);
+ page_cache_get(page);
+ lock_page(page);
+ set_page_dirty(page);
+ write_one_page(page, 1);
+ clear_bit(META_forcewrite, &mp->flag);
+ page_cache_release(page);
+}
+extern void hold_metapage(struct metapage *mp)
+{
lock_page(mp->page);
- rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
- page_offset +
- mp->logical_size);
- if (rc) {
- jfs_err("prepare_write return %d!", rc);
- ClearPageUptodate(mp->page);
+}
+
+extern void put_metapage(struct metapage *mp)
+{
+ if (mp->count || mp->nohomeok) {
+ /* Someone else will release this */
unlock_page(mp->page);
- clear_bit(META_dirty, &mp->flag);
return;
}
- rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
- page_offset +
- mp->logical_size);
- if (rc) {
- jfs_err("commit_write returned %d", rc);
- }
-
+ page_cache_get(mp->page);
+ mp->count++;
+ lock_metapage(mp);
unlock_page(mp->page);
- clear_bit(META_dirty, &mp->flag);
-
- jfs_info("__write_metapage done");
-}
-
-static inline void sync_metapage(struct metapage *mp)
-{
- struct page *page = mp->page;
-
- page_cache_get(page);
- lock_page(page);
-
- /* we're done with this page - no need to check for errors */
- if (page_has_buffers(page))
- write_one_page(page, 1);
- else
- unlock_page(page);
- page_cache_release(page);
+ release_metapage(mp);
}
void release_metapage(struct metapage * mp)
{
- struct jfs_log *log;
-
+ struct page *page = mp->page;
jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
- spin_lock(&meta_lock);
- if (test_bit(META_forced, &mp->flag)) {
- clear_bit(META_forced, &mp->flag);
- mp->count--;
- spin_unlock(&meta_lock);
- return;
- }
+ BUG_ON(!page);
+
+ lock_page(page);
+ unlock_metapage(mp);
assert(mp->count);
- if (--mp->count || atomic_read(&mp->nohomeok)) {
- unlock_metapage(mp);
- spin_unlock(&meta_lock);
+ if (--mp->count || mp->nohomeok) {
+ unlock_page(page);
+ page_cache_release(page);
return;
}
- if (mp->page) {
- set_bit(META_stale, &mp->flag);
- spin_unlock(&meta_lock);
- kunmap(mp->page);
- mp->data = NULL;
- if (test_bit(META_dirty, &mp->flag))
- __write_metapage(mp);
+ if (test_bit(META_dirty, &mp->flag)) {
+ set_page_dirty(page);
if (test_bit(META_sync, &mp->flag)) {
- sync_metapage(mp);
clear_bit(META_sync, &mp->flag);
+ write_one_page(page, 1);
+ lock_page(page); /* write_one_page unlocks the page */
}
+ } else if (mp->lsn) /* discard_metapage doesn't remove it */
+ remove_from_logsync(mp);
- if (test_bit(META_discard, &mp->flag)) {
- lock_page(mp->page);
- block_invalidatepage(mp->page, 0);
- unlock_page(mp->page);
- }
-
- page_cache_release(mp->page);
- mp->page = NULL;
- INCREMENT(mpStat.pagefree);
- spin_lock(&meta_lock);
- }
+#if MPS_PER_PAGE == 1
+ /*
+ * If we know this is the only thing in the page, we can throw
+ * the page out of the page cache. If pages are larger, we
+ * don't want to do this.
+ */
- if (mp->lsn) {
- /*
- * Remove metapage from logsynclist.
- */
- log = mp->log;
- LOGSYNC_LOCK(log);
- mp->log = NULL;
- mp->lsn = 0;
- mp->clsn = 0;
- log->count--;
- list_del(&mp->synclist);
- LOGSYNC_UNLOCK(log);
+ /* Retest mp->count since we may have released page lock */
+ if (test_bit(META_discard, &mp->flag) && !mp->count) {
+ clear_page_dirty(page);
+ ClearPageUptodate(page);
+#ifdef _NOT_YET
+ if (page->mapping) {
+ /* Remove from page cache and page cache reference */
+ remove_from_page_cache(page);
+ page_cache_release(page);
+ metapage_releasepage(page, 0);
+ }
+#endif
}
- remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
- spin_unlock(&meta_lock);
-
- free_metapage(mp);
+#else
+ /* Try to keep metapages from using up too much memory */
+ drop_metapage(page, mp);
+#endif
+ unlock_page(page);
+ page_cache_release(page);
}
void __invalidate_metapages(struct inode *ip, s64 addr, int len)
{
- struct metapage **hash_ptr;
- unsigned long lblock;
+ sector_t lblock;
int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
+ int BlocksPerPage = 1 << l2BlocksPerPage;
/* All callers are interested in block device's mapping */
- struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
+ struct address_space *mapping =
+ JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
struct metapage *mp;
struct page *page;
+ unsigned int offset;
/*
- * First, mark metapages to discard. They will eventually be
+ * Mark metapages to discard. They will eventually be
* released, but should not be written.
*/
- for (lblock = addr; lblock < addr + len;
- lblock += 1 << l2BlocksPerPage) {
- hash_ptr = meta_hash(mapping, lblock);
-again:
- spin_lock(&meta_lock);
- mp = search_hash(hash_ptr, mapping, lblock);
- if (mp) {
- if (test_bit(META_stale, &mp->flag)) {
- spin_unlock(&meta_lock);
- msleep(1);
- goto again;
- }
+ for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
+ lblock += BlocksPerPage) {
+ page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
+ if (!page)
+ continue;
+ for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+ mp = page_to_mp(page, offset);
+ if (!mp)
+ continue;
+ if (mp->index < addr)
+ continue;
+ if (mp->index >= addr + len)
+ break;
clear_bit(META_dirty, &mp->flag);
set_bit(META_discard, &mp->flag);
- spin_unlock(&meta_lock);
- } else {
- spin_unlock(&meta_lock);
- page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
- if (page) {
- block_invalidatepage(page, 0);
- unlock_page(page);
- page_cache_release(page);
- }
+ if (mp->lsn)
+ remove_from_logsync(mp);
}
+ unlock_page(page);
+ page_cache_release(page);
}
}
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 0e58aba58c3..991e9fb84c7 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -33,38 +33,27 @@ struct metapage {
unsigned long flag; /* See Below */
unsigned long count; /* Reference count */
void *data; /* Data pointer */
-
- /* list management stuff */
- struct metapage *hash_prev;
- struct metapage *hash_next; /* Also used for free list */
-
- /*
- * mapping & index become redundant, but we need these here to
- * add the metapage to the hash before we have the real page
- */
- struct address_space *mapping;
- unsigned long index;
+ sector_t index; /* block address of page */
wait_queue_head_t wait;
/* implementation */
struct page *page;
- unsigned long logical_size;
+ unsigned int logical_size;
/* Journal management */
int clsn;
- atomic_t nohomeok;
+ int nohomeok;
struct jfs_log *log;
};
/* metapage flag */
#define META_locked 0
-#define META_absolute 1
-#define META_free 2
-#define META_dirty 3
-#define META_sync 4
-#define META_discard 5
-#define META_forced 6
-#define META_stale 7
+#define META_free 1
+#define META_dirty 2
+#define META_sync 3
+#define META_discard 4
+#define META_forcewrite 5
+#define META_io 6
#define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag)
@@ -80,7 +69,16 @@ extern struct metapage *__get_metapage(struct inode *inode,
__get_metapage(inode, lblock, size, absolute, TRUE)
extern void release_metapage(struct metapage *);
-extern void hold_metapage(struct metapage *, int);
+extern void grab_metapage(struct metapage *);
+extern void force_metapage(struct metapage *);
+
+/*
+ * hold_metapage and put_metapage are used in conjuction. The page lock
+ * is not dropped between the two, so no other threads can get or release
+ * the metapage
+ */
+extern void hold_metapage(struct metapage *);
+extern void put_metapage(struct metapage *);
static inline void write_metapage(struct metapage *mp)
{
@@ -101,6 +99,46 @@ static inline void discard_metapage(struct metapage *mp)
release_metapage(mp);
}
+static inline void metapage_nohomeok(struct metapage *mp)
+{
+ struct page *page = mp->page;
+ lock_page(page);
+ if (!mp->nohomeok++) {
+ mark_metapage_dirty(mp);
+ page_cache_get(page);
+ wait_on_page_writeback(page);
+ }
+ unlock_page(page);
+}
+
+/*
+ * This serializes access to mp->lsn when metapages are added to logsynclist
+ * without setting nohomeok. i.e. updating imap & dmap
+ */
+static inline void metapage_wait_for_io(struct metapage *mp)
+{
+ if (test_bit(META_io, &mp->flag))
+ wait_on_page_writeback(mp->page);
+}
+
+/*
+ * This is called when already holding the metapage
+ */
+static inline void _metapage_homeok(struct metapage *mp)
+{
+ if (!--mp->nohomeok)
+ page_cache_release(mp->page);
+}
+
+static inline void metapage_homeok(struct metapage *mp)
+{
+ hold_metapage(mp);
+ _metapage_homeok(mp);
+ put_metapage(mp);
+}
+
+extern struct address_space_operations jfs_metapage_aops;
+
/*
* This routines invalidate all pages for an extent.
*/
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index c535ffd638e..032d111bc33 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -285,11 +285,6 @@ int jfs_mount_rw(struct super_block *sb, int remount)
*/
logMOUNT(sb);
- /*
- * Set page cache allocation policy
- */
- mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS);
-
return rc;
}
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f40301d93f7..e93d01aa12c 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -227,6 +227,7 @@ static lid_t txLockAlloc(void)
static void txLockFree(lid_t lid)
{
+ TxLock[lid].tid = 0;
TxLock[lid].next = TxAnchor.freelock;
TxAnchor.freelock = lid;
TxAnchor.tlocksInUse--;
@@ -566,9 +567,6 @@ void txEnd(tid_t tid)
* synchronize with logsync barrier
*/
if (test_bit(log_SYNCBARRIER, &log->flag)) {
- /* forward log syncpt */
- /* lmSync(log); */
-
jfs_info("log barrier off: 0x%x", log->lsn);
/* enable new transactions start */
@@ -576,15 +574,22 @@ void txEnd(tid_t tid)
/* wakeup all waitors for logsync barrier */
TXN_WAKEUP(&log->syncwait);
+
+ TXN_UNLOCK();
+
+ /* forward log syncpt */
+ jfs_syncpt(log);
+
+ goto wakeup;
}
}
+ TXN_UNLOCK();
+wakeup:
/*
* wakeup all waitors for a free tblock
*/
TXN_WAKEUP(&TxAnchor.freewait);
-
- TXN_UNLOCK();
}
@@ -633,8 +638,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
/* is page locked by the requester transaction ? */
tlck = lid_to_tlock(lid);
- if ((xtid = tlck->tid) == tid)
+ if ((xtid = tlck->tid) == tid) {
+ TXN_UNLOCK();
goto grantLock;
+ }
/*
* is page locked by anonymous transaction/lock ?
@@ -649,6 +656,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
*/
if (xtid == 0) {
tlck->tid = tid;
+ TXN_UNLOCK();
tblk = tid_to_tblock(tid);
/*
* The order of the tlocks in the transaction is important
@@ -706,17 +714,18 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
*/
tlck->tid = tid;
+ TXN_UNLOCK();
+
/* mark tlock for meta-data page */
if (mp->xflag & COMMIT_PAGE) {
tlck->flag = tlckPAGELOCK;
/* mark the page dirty and nohomeok */
- mark_metapage_dirty(mp);
- atomic_inc(&mp->nohomeok);
+ metapage_nohomeok(mp);
jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
- mp, atomic_read(&mp->nohomeok), tid, tlck);
+ mp, mp->nohomeok, tid, tlck);
/* if anonymous transaction, and buffer is on the group
* commit synclist, mark inode to show this. This will
@@ -762,8 +771,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
if (tlck->next == 0) {
/* This inode's first anonymous transaction */
jfs_ip->atltail = lid;
+ TXN_LOCK();
list_add_tail(&jfs_ip->anon_inode_list,
&TxAnchor.anon_list);
+ TXN_UNLOCK();
}
}
@@ -821,8 +832,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
grantLock:
tlck->type |= type;
- TXN_UNLOCK();
-
return tlck;
/*
@@ -841,11 +850,19 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
BUG();
}
INCREMENT(stattx.waitlock); /* statistics */
+ TXN_UNLOCK();
release_metapage(mp);
+ TXN_LOCK();
+ xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */
jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
tid, xtid, lid);
- TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
+
+ /* Recheck everything since dropping TXN_LOCK */
+ if (xtid && (tlck->mp == mp) && (mp->lid == lid))
+ TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
+ else
+ TXN_UNLOCK();
jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid);
return NULL;
@@ -906,6 +923,7 @@ static void txUnlock(struct tblock * tblk)
struct metapage *mp;
struct jfs_log *log;
int difft, diffp;
+ unsigned long flags;
jfs_info("txUnlock: tblk = 0x%p", tblk);
log = JFS_SBI(tblk->sb)->log;
@@ -925,19 +943,14 @@ static void txUnlock(struct tblock * tblk)
assert(mp->xflag & COMMIT_PAGE);
/* hold buffer
- *
- * It's possible that someone else has the metapage.
- * The only things were changing are nohomeok, which
- * is handled atomically, and clsn which is protected
- * by the LOGSYNC_LOCK.
*/
- hold_metapage(mp, 1);
+ hold_metapage(mp);
- assert(atomic_read(&mp->nohomeok) > 0);
- atomic_dec(&mp->nohomeok);
+ assert(mp->nohomeok > 0);
+ _metapage_homeok(mp);
/* inherit younger/larger clsn */
- LOGSYNC_LOCK(log);
+ LOGSYNC_LOCK(log, flags);
if (mp->clsn) {
logdiff(difft, tblk->clsn, log);
logdiff(diffp, mp->clsn, log);
@@ -945,16 +958,11 @@ static void txUnlock(struct tblock * tblk)
mp->clsn = tblk->clsn;
} else
mp->clsn = tblk->clsn;
- LOGSYNC_UNLOCK(log);
+ LOGSYNC_UNLOCK(log, flags);
assert(!(tlck->flag & tlckFREEPAGE));
- if (tlck->flag & tlckWRITEPAGE) {
- write_metapage(mp);
- } else {
- /* release page which has been forced */
- release_metapage(mp);
- }
+ put_metapage(mp);
}
/* insert tlock, and linelock(s) of the tlock if any,
@@ -981,10 +989,10 @@ static void txUnlock(struct tblock * tblk)
* has been inserted in logsync list at txUpdateMap())
*/
if (tblk->lsn) {
- LOGSYNC_LOCK(log);
+ LOGSYNC_LOCK(log, flags);
log->count--;
list_del(&tblk->synclist);
- LOGSYNC_UNLOCK(log);
+ LOGSYNC_UNLOCK(log, flags);
}
}
@@ -1573,8 +1581,8 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
* the last entry, so don't bother logging this
*/
mp->lid = 0;
- hold_metapage(mp, 0);
- atomic_dec(&mp->nohomeok);
+ grab_metapage(mp);
+ metapage_homeok(mp);
discard_metapage(mp);
tlck->mp = NULL;
return 0;
@@ -1712,7 +1720,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
struct maplock *maplock;
struct xdlistlock *xadlock;
struct pxd_lock *pxdlock;
- pxd_t *pxd;
+ pxd_t *page_pxd;
int next, lwm, hwm;
ip = tlck->ip;
@@ -1722,7 +1730,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
- pxd = &lrd->log.redopage.pxd;
+ page_pxd = &lrd->log.redopage.pxd;
if (tlck->type & tlckBTROOT) {
lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
@@ -1752,9 +1760,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
* applying the after-image to the meta-data page.
*/
lrd->type = cpu_to_le16(LOG_REDOPAGE);
-// *pxd = mp->cm_pxd;
- PXDaddress(pxd, mp->index);
- PXDlength(pxd,
+// *page_pxd = mp->cm_pxd;
+ PXDaddress(page_pxd, mp->index);
+ PXDlength(page_pxd,
mp->logical_size >> tblk->sb->s_blocksize_bits);
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
@@ -1776,25 +1784,31 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
tlck->flag |= tlckUPDATEMAP;
xadlock->flag = mlckALLOCXADLIST;
xadlock->count = next - lwm;
- if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
+ if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
int i;
+ pxd_t *pxd;
/*
* Lazy commit may allow xtree to be modified before
* txUpdateMap runs. Copy xad into linelock to
* preserve correct data.
+ *
+ * We can fit twice as may pxd's as xads in the lock
*/
- xadlock->xdlist = &xtlck->pxdlock;
- memcpy(xadlock->xdlist, &p->xad[lwm],
- sizeof(xad_t) * xadlock->count);
-
- for (i = 0; i < xadlock->count; i++)
+ xadlock->flag = mlckALLOCPXDLIST;
+ pxd = xadlock->xdlist = &xtlck->pxdlock;
+ for (i = 0; i < xadlock->count; i++) {
+ PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
+ PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
p->xad[lwm + i].flag &=
~(XAD_NEW | XAD_EXTENDED);
+ pxd++;
+ }
} else {
/*
* xdlist will point to into inode's xtree, ensure
* that transaction is not committed lazily.
*/
+ xadlock->flag = mlckALLOCXADLIST;
xadlock->xdlist = &p->xad[lwm];
tblk->xflag &= ~COMMIT_LAZY;
}
@@ -1836,8 +1850,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
if (tblk->xflag & COMMIT_TRUNCATE) {
/* write NOREDOPAGE for the page */
lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
- PXDaddress(pxd, mp->index);
- PXDlength(pxd,
+ PXDaddress(page_pxd, mp->index);
+ PXDlength(page_pxd,
mp->logical_size >> tblk->sb->
s_blocksize_bits);
lrd->backchain =
@@ -1872,22 +1886,32 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
* deleted page itself;
*/
tlck->flag |= tlckUPDATEMAP;
- xadlock->flag = mlckFREEXADLIST;
xadlock->count = hwm - XTENTRYSTART + 1;
- if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
+ if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
+ int i;
+ pxd_t *pxd;
/*
* Lazy commit may allow xtree to be modified before
* txUpdateMap runs. Copy xad into linelock to
* preserve correct data.
+ *
+ * We can fit twice as may pxd's as xads in the lock
*/
- xadlock->xdlist = &xtlck->pxdlock;
- memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART],
- sizeof(xad_t) * xadlock->count);
+ xadlock->flag = mlckFREEPXDLIST;
+ pxd = xadlock->xdlist = &xtlck->pxdlock;
+ for (i = 0; i < xadlock->count; i++) {
+ PXDaddress(pxd,
+ addressXAD(&p->xad[XTENTRYSTART + i]));
+ PXDlength(pxd,
+ lengthXAD(&p->xad[XTENTRYSTART + i]));
+ pxd++;
+ }
} else {
/*
* xdlist will point to into inode's xtree, ensure
* that transaction is not committed lazily.
*/
+ xadlock->flag = mlckFREEXADLIST;
xadlock->xdlist = &p->xad[XTENTRYSTART];
tblk->xflag &= ~COMMIT_LAZY;
}
@@ -1918,7 +1942,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
* header ?
*/
if (tlck->type & tlckTRUNCATE) {
- pxd_t tpxd; /* truncated extent of xad */
+ pxd_t pxd; /* truncated extent of xad */
int twm;
/*
@@ -1947,8 +1971,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
* applying the after-image to the meta-data page.
*/
lrd->type = cpu_to_le16(LOG_REDOPAGE);
- PXDaddress(pxd, mp->index);
- PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
+ PXDaddress(page_pxd, mp->index);
+ PXDlength(page_pxd,
+ mp->logical_size >> tblk->sb->s_blocksize_bits);
lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
/*
@@ -1966,7 +1991,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
lrd->log.updatemap.nxd = cpu_to_le16(1);
lrd->log.updatemap.pxd = pxdlock->pxd;
- tpxd = pxdlock->pxd; /* save to format maplock */
+ pxd = pxdlock->pxd; /* save to format maplock */
lrd->backchain =
cpu_to_le32(lmLog(log, tblk, lrd, NULL));
}
@@ -2035,7 +2060,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
pxdlock = (struct pxd_lock *) xadlock;
pxdlock->flag = mlckFREEPXD;
pxdlock->count = 1;
- pxdlock->pxd = tpxd;
+ pxdlock->pxd = pxd;
jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
"hwm:%d", ip, mp, pxdlock->count, hwm);
@@ -2253,7 +2278,8 @@ void txForce(struct tblock * tblk)
tlck->flag &= ~tlckWRITEPAGE;
/* do not release page to freelist */
-
+ force_metapage(mp);
+#if 0
/*
* The "right" thing to do here is to
* synchronously write the metadata.
@@ -2265,9 +2291,10 @@ void txForce(struct tblock * tblk)
* we can get by with synchronously writing
* the pages when they are released.
*/
- assert(atomic_read(&mp->nohomeok));
+ assert(mp->nohomeok);
set_bit(META_dirty, &mp->flag);
set_bit(META_sync, &mp->flag);
+#endif
}
}
}
@@ -2327,7 +2354,7 @@ static void txUpdateMap(struct tblock * tblk)
*/
mp = tlck->mp;
ASSERT(mp->xflag & COMMIT_PAGE);
- hold_metapage(mp, 0);
+ grab_metapage(mp);
}
/*
@@ -2377,8 +2404,8 @@ static void txUpdateMap(struct tblock * tblk)
ASSERT(mp->lid == lid);
tlck->mp->lid = 0;
}
- assert(atomic_read(&mp->nohomeok) == 1);
- atomic_dec(&mp->nohomeok);
+ assert(mp->nohomeok == 1);
+ metapage_homeok(mp);
discard_metapage(mp);
tlck->mp = NULL;
}
@@ -2844,24 +2871,9 @@ static void LogSyncRelease(struct metapage * mp)
{
struct jfs_log *log = mp->log;
- assert(atomic_read(&mp->nohomeok));
+ assert(mp->nohomeok);
assert(log);
- atomic_dec(&mp->nohomeok);
-
- if (atomic_read(&mp->nohomeok))
- return;
-
- hold_metapage(mp, 0);
-
- LOGSYNC_LOCK(log);
- mp->log = NULL;
- mp->lsn = 0;
- mp->clsn = 0;
- log->count--;
- list_del_init(&mp->synclist);
- LOGSYNC_UNLOCK(log);
-
- release_metapage(mp);
+ metapage_homeok(mp);
}
/*
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index f31a9e3f3fe..5cf91785b54 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -49,7 +49,6 @@
*/
int jfs_umount(struct super_block *sb)
{
- struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping;
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct inode *ipbmap = sbi->ipbmap;
struct inode *ipimap = sbi->ipimap;
@@ -109,8 +108,8 @@ int jfs_umount(struct super_block *sb)
* Make sure all metadata makes it to disk before we mark
* the superblock as clean
*/
- filemap_fdatawrite(bdev_mapping);
- filemap_fdatawait(bdev_mapping);
+ filemap_fdatawrite(sbi->direct_inode->i_mapping);
+ filemap_fdatawait(sbi->direct_inode->i_mapping);
/*
* ensure all file system file pages are propagated to their
@@ -123,9 +122,6 @@ int jfs_umount(struct super_block *sb)
if (log) { /* log = NULL if read-only mount */
updateSuper(sb, FM_CLEAN);
- /* Restore default gfp_mask for bdev */
- mapping_set_gfp_mask(bdev_mapping, GFP_USER);
-
/*
* close log:
*
@@ -140,7 +136,6 @@ int jfs_umount(struct super_block *sb)
int jfs_umount_rw(struct super_block *sb)
{
- struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping;
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_log *log = sbi->log;
@@ -166,13 +161,10 @@ int jfs_umount_rw(struct super_block *sb)
* mark the superblock clean before everything is flushed to
* disk.
*/
- filemap_fdatawrite(bdev_mapping);
- filemap_fdatawait(bdev_mapping);
+ filemap_fdatawrite(sbi->direct_inode->i_mapping);
+ filemap_fdatawait(sbi->direct_inode->i_mapping);
updateSuper(sb, FM_CLEAN);
- /* Restore default gfp_mask for bdev */
- mapping_set_gfp_mask(bdev_mapping, GFP_USER);
-
return lmLogClose(sb);
}
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 11c58c54b81..31b34db4519 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) International Business Machines Corp., 2000-2004
+ * Copyright (C) International Business Machines Corp., 2000-2005
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -111,8 +111,8 @@ static struct {
/*
* forward references
*/
-static int xtSearch(struct inode *ip,
- s64 xoff, int *cmpp, struct btstack * btstack, int flag);
+static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
+ struct btstack * btstack, int flag);
static int xtSplitUp(tid_t tid,
struct inode *ip,
@@ -159,11 +159,12 @@ int xtLookup(struct inode *ip, s64 lstart,
xtpage_t *p;
int index;
xad_t *xad;
- s64 size, xoff, xend;
+ s64 next, size, xoff, xend;
int xlen;
s64 xaddr;
- *plen = 0;
+ *paddr = 0;
+ *plen = llen;
if (!no_check) {
/* is lookup offset beyond eof ? */
@@ -180,7 +181,7 @@ int xtLookup(struct inode *ip, s64 lstart,
* search for the xad entry covering the logical extent
*/
//search:
- if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) {
+ if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
jfs_err("xtLookup: xtSearch returned %d", rc);
return rc;
}
@@ -198,8 +199,11 @@ int xtLookup(struct inode *ip, s64 lstart,
* lstart is a page start address,
* i.e., lstart cannot start in a hole;
*/
- if (cmp)
+ if (cmp) {
+ if (next)
+ *plen = min(next - lstart, llen);
goto out;
+ }
/*
* lxd covered by xad
@@ -284,7 +288,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
if (lstart >= size)
return 0;
- if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0)))
+ if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
return rc;
/*
@@ -488,6 +492,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
* parameters:
* ip - file object;
* xoff - extent offset;
+ * nextp - address of next extent (if any) for search miss
* cmpp - comparison result:
* btstack - traverse stack;
* flag - search process flag (XT_INSERT);
@@ -497,7 +502,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
* *cmpp is set to result of comparison with the entry returned.
* the page containing the entry is pinned at exit.
*/
-static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
+static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
int *cmpp, struct btstack * btstack, int flag)
{
struct jfs_inode_info *jfs_ip = JFS_IP(ip);
@@ -511,6 +516,7 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
struct btframe *btsp;
int nsplit = 0; /* number of pages to split */
s64 t64;
+ s64 next = 0;
INCREMENT(xtStat.search);
@@ -579,6 +585,7 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
* previous and this entry
*/
*cmpp = 1;
+ next = t64;
goto out;
}
@@ -623,6 +630,9 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
/* update sequential access heuristics */
jfs_ip->btindex = index;
+ if (nextp)
+ *nextp = next;
+
INCREMENT(xtStat.fastSearch);
return 0;
}
@@ -675,10 +685,11 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
return 0;
}
-
/* search hit - internal page:
* descend/search its child page
*/
+ if (index < le16_to_cpu(p->header.nextindex)-1)
+ next = offsetXAD(&p->xad[index + 1]);
goto next;
}
@@ -694,6 +705,8 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
* base is the smallest index with key (Kj) greater than
* search key (K) and may be zero or maxentry index.
*/
+ if (base < le16_to_cpu(p->header.nextindex))
+ next = offsetXAD(&p->xad[base]);
/*
* search miss - leaf page:
*
@@ -727,6 +740,9 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
jfs_ip->btorder = BT_RANDOM;
jfs_ip->btindex = base;
+ if (nextp)
+ *nextp = next;
+
return 0;
}
@@ -793,6 +809,7 @@ int xtInsert(tid_t tid, /* transaction id */
struct xtsplit split; /* split information */
xad_t *xad;
int cmp;
+ s64 next;
struct tlock *tlck;
struct xtlock *xtlck;
@@ -806,7 +823,7 @@ int xtInsert(tid_t tid, /* transaction id */
* n.b. xtSearch() may return index of maxentry of
* the full page.
*/
- if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+ if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
return rc;
/* retrieve search result */
@@ -814,7 +831,7 @@ int xtInsert(tid_t tid, /* transaction id */
/* This test must follow XT_GETSEARCH since mp must be valid if
* we branch to out: */
- if (cmp == 0) {
+ if ((cmp == 0) || (next && (xlen > next - xoff))) {
rc = -EEXIST;
goto out;
}
@@ -1626,7 +1643,7 @@ int xtExtend(tid_t tid, /* transaction id */
jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
/* there must exist extent to be extended */
- if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT)))
+ if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
return rc;
/* retrieve search result */
@@ -1794,7 +1811,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
*/
/* there must exist extent to be tailgated */
- if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+ if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
return rc;
/* retrieve search result */
@@ -1977,7 +1994,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
nxlen = lengthXAD(nxad);
nxaddr = addressXAD(nxad);
- if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
+ if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
return rc;
/* retrieve search result */
@@ -2291,7 +2308,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
if (nextindex == le16_to_cpu(p->header.maxentry)) {
XT_PUTPAGE(mp);
- if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
+ if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
return rc;
/* retrieve search result */
@@ -2438,6 +2455,7 @@ int xtAppend(tid_t tid, /* transaction id */
int nsplit, nblocks, xlen;
struct pxdlist pxdlist;
pxd_t *pxd;
+ s64 next;
xaddr = *xaddrp;
xlen = *xlenp;
@@ -2452,7 +2470,7 @@ int xtAppend(tid_t tid, /* transaction id */
* n.b. xtSearch() may return index of maxentry of
* the full page.
*/
- if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+ if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
return rc;
/* retrieve search result */
@@ -2462,6 +2480,9 @@ int xtAppend(tid_t tid, /* transaction id */
rc = -EEXIST;
goto out;
}
+
+ if (next)
+ xlen = min(xlen, (int)(next - xoff));
//insert:
/*
* insert entry for new extent
@@ -2600,7 +2621,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
/*
* find the matching entry; xtSearch() pins the page
*/
- if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
+ if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
return rc;
XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
@@ -2852,7 +2873,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
*/
if (xtype == DATAEXT) {
/* search in leaf entry */
- rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
+ rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
if (rc)
return rc;
@@ -2958,7 +2979,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
}
/* get back parent page */
- if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
+ if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
return rc;
XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
@@ -3991,7 +4012,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
if (committed_size) {
xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
- rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
+ rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
if (rc)
return rc;
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 2eb6869b6e7..c6dc254d325 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -209,6 +209,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
*/
txQuiesce(sb);
+ /* Reset size of direct inode */
+ sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size;
+
if (sbi->mntflag & JFS_INLINELOG) {
/*
* deactivate old inline log
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5856866e24f..5e774ed7fb6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -210,6 +210,10 @@ static void jfs_put_super(struct super_block *sb)
unload_nls(sbi->nls_tab);
sbi->nls_tab = NULL;
+ truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
+ iput(sbi->direct_inode);
+ sbi->direct_inode = NULL;
+
kfree(sbi);
}
@@ -358,6 +362,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
}
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
+ /*
+ * Invalidate any previously read metadata. fsck may have
+ * changed the on-disk data since we mounted r/o
+ */
+ truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
+
JFS_SBI(sb)->flag = flag;
return jfs_mount_rw(sb, 1);
}
@@ -428,12 +438,26 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &jfs_super_operations;
sb->s_export_op = &jfs_export_operations;
+ /*
+ * Initialize direct-mapping inode/address-space
+ */
+ inode = new_inode(sb);
+ if (inode == NULL)
+ goto out_kfree;
+ inode->i_ino = 0;
+ inode->i_nlink = 1;
+ inode->i_size = sb->s_bdev->bd_inode->i_size;
+ inode->i_mapping->a_ops = &jfs_metapage_aops;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+
+ sbi->direct_inode = inode;
+
rc = jfs_mount(sb);
if (rc) {
if (!silent) {
jfs_err("jfs_mount failed w/return code = %d", rc);
}
- goto out_kfree;
+ goto out_mount_failed;
}
if (sb->s_flags & MS_RDONLY)
sbi->log = NULL;
@@ -482,6 +506,13 @@ out_no_rw:
if (rc) {
jfs_err("jfs_umount failed with return code %d", rc);
}
+out_mount_failed:
+ filemap_fdatawrite(sbi->direct_inode->i_mapping);
+ filemap_fdatawait(sbi->direct_inode->i_mapping);
+ truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
+ make_bad_inode(sbi->direct_inode);
+ iput(sbi->direct_inode);
+ sbi->direct_inode = NULL;
out_kfree:
if (sbi->nls_tab)
unload_nls(sbi->nls_tab);
@@ -527,8 +558,10 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
struct jfs_log *log = JFS_SBI(sb)->log;
/* log == NULL indicates read-only mount */
- if (log)
+ if (log) {
jfs_flush_journal(log, wait);
+ jfs_syncpt(log);
+ }
return 0;
}