From 07c8f67587724b417f60bffb32c448dd94647b54 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 16:11:59 +1100 Subject: [XFS] Make use of the init-once slab optimisation. To avoid having to initialise some fields of the XFS inode on every allocation, we can use the slab init-once feature to initialise them. All we have to guarantee is that when we free the inode, all it's entries are in the initial state. Add asserts where possible to ensure debug kernels check this initial state before freeing and after allocation. SGI-PV: 981498 SGI-Modid: xfs-linux-melb:xfs-kern:31925a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 37 ++++++++++++++- fs/xfs/xfs_iget.c | 15 ------ fs/xfs/xfs_inode.c | 111 +++++++++++++++++++++++++++++-------------- fs/xfs/xfs_inode.h | 1 + fs/xfs/xfs_itable.c | 14 +++--- 5 files changed, 119 insertions(+), 59 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 37ebe36056e..d1c4dec51a3 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -887,6 +887,41 @@ xfs_fs_inode_init_once( inode_init_once((struct inode *)vnode); } + +/* + * Slab object creation initialisation for the XFS inode. + * This covers only the idempotent fields in the XFS inode; + * all other fields need to be initialised on allocation + * from the slab. This avoids the need to repeatedly intialise + * fields in the xfs inode that left in the initialise state + * when freeing the inode. + */ +void +xfs_inode_init_once( + kmem_zone_t *zone, + void *inode) +{ + struct xfs_inode *ip = inode; + + memset(ip, 0, sizeof(struct xfs_inode)); + atomic_set(&ip->i_iocount, 0); + atomic_set(&ip->i_pincount, 0); + spin_lock_init(&ip->i_flags_lock); + INIT_LIST_HEAD(&ip->i_reclaim); + init_waitqueue_head(&ip->i_ipin_wait); + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&ip->i_flush); + complete(&ip->i_flush); + + mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); +} + /* * Attempt to flush the inode, this will actually fail * if the inode is pinned, but we dirty the inode again @@ -2018,7 +2053,7 @@ xfs_init_zones(void) xfs_inode_zone = kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, NULL); + KM_ZONE_SPREAD, xfs_inode_init_once); if (!xfs_inode_zone) goto out_destroy_efi_zone; diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index e229e9e001c..5be89d760a9 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -210,21 +210,6 @@ finish_inode: xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - - mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, - "xfsino", ip->i_ino); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - init_waitqueue_head(&ip->i_ipin_wait); - atomic_set(&ip->i_pincount, 0); - - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&ip->i_flush); - complete(&ip->i_flush); - if (lock_flags) xfs_ilock(ip, lock_flags); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index dbd9cef852e..b0c604e1bd4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -787,6 +787,70 @@ xfs_dic2xflags( (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); } +/* + * Allocate and initialise an xfs_inode. + */ +struct xfs_inode * +xfs_inode_alloc( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + struct xfs_inode *ip; + + /* + * if this didn't occur in transactions, we could use + * KM_MAYFAIL and return NULL here on ENOMEM. Set the + * code up to do this anyway. + */ + ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); + if (!ip) + return NULL; + + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(list_empty(&ip->i_reclaim)); + + ip->i_ino = ino; + ip->i_mount = mp; + ip->i_blkno = 0; + ip->i_len = 0; + ip->i_boffset =0; + ip->i_afp = NULL; + memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); + ip->i_flags = 0; + ip->i_update_core = 0; + ip->i_update_size = 0; + ip->i_delayed_blks = 0; + memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); + ip->i_size = 0; + ip->i_new_size = 0; + + /* + * Initialize inode's trace buffers. + */ +#ifdef XFS_INODE_TRACE + ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_BMAP_TRACE + ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_BMBT_TRACE + ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_RW_TRACE + ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_ILOCK_TRACE + ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_DIR2_TRACE + ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); +#endif + + return ip; +} + /* * Given a mount structure and an inode number, return a pointer * to a newly allocated in-core inode corresponding to the given @@ -809,13 +873,9 @@ xfs_iread( xfs_inode_t *ip; int error; - ASSERT(xfs_inode_zone != NULL); - - ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); - ip->i_ino = ino; - ip->i_mount = mp; - atomic_set(&ip->i_iocount, 0); - spin_lock_init(&ip->i_flags_lock); + ip = xfs_inode_alloc(mp, ino); + if (!ip) + return ENOMEM; /* * Get pointer's to the on-disk inode and the buffer containing it. @@ -830,35 +890,12 @@ xfs_iread( return error; } - /* - * Initialize inode's trace buffers. - * Do this before xfs_iformat in case it adds entries. - */ -#ifdef XFS_INODE_TRACE - ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BMAP_TRACE - ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BMBT_TRACE - ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_RW_TRACE - ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_ILOCK_TRACE - ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_DIR2_TRACE - ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); -#endif - /* * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { - kmem_zone_free(xfs_inode_zone, ip); + xfs_idestroy(ip); xfs_trans_brelse(tp, bp); #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " @@ -881,7 +918,7 @@ xfs_iread( xfs_dinode_from_disk(&ip->i_d, &dip->di_core); error = xfs_iformat(ip, dip); if (error) { - kmem_zone_free(xfs_inode_zone, ip); + xfs_idestroy(ip); xfs_trans_brelse(tp, bp); #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " @@ -911,8 +948,6 @@ xfs_iread( XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); } - INIT_LIST_HEAD(&ip->i_reclaim); - /* * The inode format changed when we moved the link count and * made it 32 bits long. If this is an old format inode, @@ -2631,8 +2666,6 @@ xfs_idestroy( } if (ip->i_afp) xfs_idestroy_fork(ip, XFS_ATTR_FORK); - mrfree(&ip->i_lock); - mrfree(&ip->i_iolock); #ifdef XFS_INODE_TRACE ktrace_free(ip->i_trace); @@ -2671,7 +2704,13 @@ xfs_idestroy( spin_unlock(&mp->m_ail_lock); } xfs_inode_item_destroy(ip); + ip->i_itemp = NULL; } + /* asserts to verify all state is correct here */ + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(list_empty(&ip->i_reclaim)); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1420c49674d..3af1f6dd149 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -513,6 +513,7 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); +struct xfs_inode * xfs_inode_alloc(struct xfs_mount *, xfs_ino_t); void xfs_idestroy_fork(xfs_inode_t *, int); void xfs_idestroy(xfs_inode_t *); void xfs_idata_realloc(xfs_inode_t *, int, int); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index cf6754a3c5b..4f4c9394106 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -594,21 +594,21 @@ xfs_bulkstat( /* * Get the inode cluster buffer */ - ASSERT(xfs_inode_zone != NULL); - ip = kmem_zone_zalloc(xfs_inode_zone, - KM_SLEEP); - ip->i_ino = ino; - ip->i_mount = mp; - spin_lock_init(&ip->i_flags_lock); if (bp) xfs_buf_relse(bp); + ip = xfs_inode_alloc(mp, ino); + if (!ip) { + bp = NULL; + rval = ENOMEM; + break; + } error = xfs_itobp(mp, NULL, ip, &dip, &bp, bno, XFS_IMAP_BULKSTAT, XFS_BUF_LOCK); if (!error) clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; - kmem_zone_free(xfs_inode_zone, ip); + xfs_idestroy(ip); if (XFS_TEST_ERROR(error != 0, mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, XFS_RANDOM_BULKSTAT_READ_CHUNK)) { -- cgit v1.2.3 From be8b78a626dd9bc92c12e9ac34f3bc3db1204d25 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:42:34 +1100 Subject: [XFS] Remove kmem_zone_t argument from xfs_inode_init_once() kmem cache constructor no longer takes a kmem_zone_t argument. SGI-PV: 957103 SGI-Modid: xfs-linux-melb:xfs-kern:32254a Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_super.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index d1c4dec51a3..9bfb26066a8 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -898,7 +898,6 @@ xfs_fs_inode_init_once( */ void xfs_inode_init_once( - kmem_zone_t *zone, void *inode) { struct xfs_inode *ip = inode; -- cgit v1.2.3 From d07c60e54fb7647d8247ae392f128e8ee8f3e5f3 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:50:35 +1100 Subject: [XFS] Use xfs_idestroy() to cleanup an inode. SGI-PV: 981498 SGI-Modid: xfs-linux-melb:xfs-kern:31927a Signed-off-by: Lachlan McIlroy Signed-off-by: David Chinner --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b0c604e1bd4..2a158a26286 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -886,7 +886,7 @@ xfs_iread( */ error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); if (error) { - kmem_zone_free(xfs_inode_zone, ip); + xfs_idestroy(ip); return error; } -- cgit v1.2.3 From 46039928c9abe466ed1bc0da20c2e596b1d41236 Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Thu, 30 Oct 2008 16:52:35 +1100 Subject: [XFS] Remove final remnants of dirv1 macros and other stuff SGI-PV: 981498 SGI-Modid: xfs-linux-melb:xfs-kern:32002a Signed-off-by: Barry Naujok Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_da_btree.h | 20 -------------------- fs/xfs/xfs_mount.h | 1 - 2 files changed, 21 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 8be0b00ede9..599e270e695 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -72,27 +72,7 @@ typedef struct xfs_da_intnode { typedef struct xfs_da_node_hdr xfs_da_node_hdr_t; typedef struct xfs_da_node_entry xfs_da_node_entry_t; -#define XFS_DA_MAXHASH ((xfs_dahash_t)-1) /* largest valid hash value */ - #define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize -#define XFS_LBLOG(mp) (mp)->m_sb.sb_blocklog - -#define XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \ - (((bno) << (mp)->m_dircook_elog) | (entry)) -#define XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \ - (((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash)) -#define XFS_DA_COOKIE_HASH(mp,cookie) ((xfs_dahash_t)cookie) -#define XFS_DA_COOKIE_BNO(mp,cookie) \ - ((((xfs_off_t)(cookie) >> 31) == -1LL ? \ - (xfs_dablk_t)0 : \ - (xfs_dablk_t)((xfs_off_t)(cookie) >> \ - ((mp)->m_dircook_elog + 32)))) -#define XFS_DA_COOKIE_ENTRY(mp,cookie) \ - ((((xfs_off_t)(cookie) >> 31) == -1LL ? \ - (xfs_dablk_t)0 : \ - (xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \ - ((1 << (mp)->m_dircook_elog) - 1)))) - /*======================================================================== * Btree searching and modification structure definitions. diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f3c1024b124..49d647e730e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -267,7 +267,6 @@ typedef struct xfs_mount { xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ - __uint8_t m_dircook_elog; /* log d-cookie entry bits */ __uint8_t m_blkbit_log; /* blocklog + NBBY */ __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ __uint8_t m_agno_log; /* log #ag's */ -- cgit v1.2.3 From a357a1215602f79182abdde27aaddc7166dbd709 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:53:25 +1100 Subject: [XFS] Fix use-after-free with log and quotas Destroying the quota stuff on unmount can access the log - ie XFS_QM_DONE() ends up in xfs_dqunlock() which calls xfs_trans_unlocked_item() and then xfs_log_move_tail(). By this time the log has already been destroyed. Just move the cleanup of the quota code earlier in xfs_unmountfs() before the call to xfs_log_unmount(). Moving XFS_QM_DONE() up near XFS_QM_DQPURGEALL() seems like a good spot. SGI-PV: 987086 SGI-Modid: xfs-linux-melb:xfs-kern:32148a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig Signed-off-by: Peter Leckie --- fs/xfs/xfs_mount.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a4503f5e949..15f5dd22fbb 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1245,6 +1245,9 @@ xfs_unmountfs( XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); + if (mp->m_quotainfo) + XFS_QM_DONE(mp); + /* * Flush out the log synchronously so that we know for sure * that nothing is pinned. This is important because bflush() @@ -1297,8 +1300,6 @@ xfs_unmountfs( xfs_errortag_clearall(mp, 0); #endif xfs_free_perag(mp); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); } STATIC void -- cgit v1.2.3 From f338f9036400e453ab553b16639a9cc838b02d44 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:53:38 +1100 Subject: [XFS] Unlock inode before calling xfs_idestroy() Lock debugging reported the ilock was being destroyed without being unlocked. We don't need to lock the inode until we are going to insert it into the radix tree. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32159a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_iget.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 5be89d760a9..4c92d190b3b 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -210,9 +210,6 @@ finish_inode: xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - if (lock_flags) - xfs_ilock(ip, lock_flags); - if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { xfs_idestroy(ip); xfs_put_perag(mp, pag); @@ -228,6 +225,10 @@ finish_inode: delay(1); goto again; } + + if (lock_flags) + xfs_ilock(ip, lock_flags); + mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = agino & mask; write_lock(&pag->pag_ici_lock); @@ -239,6 +240,8 @@ finish_inode: BUG_ON(error != -EEXIST); write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); + if (lock_flags) + xfs_iunlock(ip, lock_flags); xfs_idestroy(ip); XFS_STATS_INC(xs_ig_dup); goto again; -- cgit v1.2.3 From f2277f06e626d694e61bb356524ff536ced24acf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:53:47 +1100 Subject: [XFS] kill struct xfs_btree_hdr This type is only embedded in struct xfs_btree_block and never used directly. By moving the fields directly into struct xfs_btree_block a lot of the macros for struct xfs_btree_sblock and struct xfs_btree_lblock can be used for struct xfs_btree_block too now which helps greatly with some of the migrations during implementing the generic btree code. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32174a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_btree.c | 12 ++++++------ fs/xfs/xfs_btree.h | 7 +------ 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index cc593a84c34..31002093bfb 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -62,13 +62,13 @@ xfs_btree_maxrecs( case XFS_BTNUM_BNO: case XFS_BTNUM_CNT: return (int)XFS_ALLOC_BLOCK_MAXRECS( - be16_to_cpu(block->bb_h.bb_level), cur); + be16_to_cpu(block->bb_level), cur); case XFS_BTNUM_BMAP: return (int)XFS_BMAP_BLOCK_IMAXRECS( - be16_to_cpu(block->bb_h.bb_level), cur); + be16_to_cpu(block->bb_level), cur); case XFS_BTNUM_INO: return (int)XFS_INOBT_BLOCK_MAXRECS( - be16_to_cpu(block->bb_h.bb_level), cur); + be16_to_cpu(block->bb_level), cur); default: ASSERT(0); return 0; @@ -634,7 +634,7 @@ xfs_btree_firstrec( /* * It's empty, there is no such record. */ - if (!block->bb_h.bb_numrecs) + if (!block->bb_numrecs) return 0; /* * Set the ptr value to 1, that's the first record/key. @@ -663,12 +663,12 @@ xfs_btree_lastrec( /* * It's empty, there is no such record. */ - if (!block->bb_h.bb_numrecs) + if (!block->bb_numrecs) return 0; /* * Set the ptr value to numrecs, that's the last record/key. */ - cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs); + cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); return 1; } diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 1f528a2a375..332b9f1da20 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -63,15 +63,10 @@ typedef struct xfs_btree_lblock { /* * Combined header and structure, used by common code. */ -typedef struct xfs_btree_hdr -{ +typedef struct xfs_btree_block { __be32 bb_magic; /* magic number for block type */ __be16 bb_level; /* 0 is a leaf */ __be16 bb_numrecs; /* current # of data records */ -} xfs_btree_hdr_t; - -typedef struct xfs_btree_block { - xfs_btree_hdr_t bb_h; /* header */ union { struct { __be32 bb_leftsib; -- cgit v1.2.3 From 561f7d17390d00444e6cd0b02b7516c91528082e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:53:59 +1100 Subject: [XFS] split up xfs_btree_init_cursor xfs_btree_init_cursor contains close to little shared code for the different btrees and will get even more non-common code in the future. Split it up into one routine per btree type. Because xfs_btree_dup_cursor needs to call the init routine for a generic btree cursor add a new btree operation vector that contains a dup_cursor method that initializes a new cursor based on an existing one. The btree operations vector is based on an idea and code from Dave Chinner and will grow more entries later during this series. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32176a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc.c | 34 +++++++-------- fs/xfs/xfs_alloc_btree.c | 45 +++++++++++++++++++ fs/xfs/xfs_alloc_btree.h | 5 +++ fs/xfs/xfs_bmap.c | 17 +++----- fs/xfs/xfs_bmap_btree.c | 59 +++++++++++++++++++++++++ fs/xfs/xfs_bmap_btree.h | 4 ++ fs/xfs/xfs_btree.c | 107 ++-------------------------------------------- fs/xfs/xfs_btree.h | 20 +++------ fs/xfs/xfs_ialloc.c | 12 ++---- fs/xfs/xfs_ialloc_btree.c | 41 ++++++++++++++++++ fs/xfs/xfs_ialloc_btree.h | 4 ++ fs/xfs/xfs_itable.c | 6 +-- 12 files changed, 196 insertions(+), 158 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 1956f83489f..69833eb1de4 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -640,8 +640,8 @@ xfs_alloc_ag_vextent_exact( /* * Allocate/initialize a cursor for the by-number freespace btree. */ - bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); /* * Lookup bno and minlen in the btree (minlen is irrelevant, really). * Look for the closest free block <= bno, it must contain bno @@ -696,8 +696,8 @@ xfs_alloc_ag_vextent_exact( * We are allocating agbno for rlen [agbno .. end] * Allocate/initialize a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); ASSERT(args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, @@ -759,8 +759,8 @@ xfs_alloc_ag_vextent_near( /* * Get a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); ltlen = 0; bno_cur_lt = bno_cur_gt = NULL; /* @@ -886,8 +886,8 @@ xfs_alloc_ag_vextent_near( /* * Set up a cursor for the by-bno tree. */ - bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, - args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_BNO); /* * Fix up the btree entries. */ @@ -914,8 +914,8 @@ xfs_alloc_ag_vextent_near( /* * Allocate and initialize the cursor for the leftward search. */ - bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); /* * Lookup <= bno to find the leftward search's starting point. */ @@ -1267,8 +1267,8 @@ xfs_alloc_ag_vextent_size( /* * Allocate and initialize a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); bno_cur = NULL; /* * Look for an entry >= maxlen+alignment-1 blocks. @@ -1372,8 +1372,8 @@ xfs_alloc_ag_vextent_size( /* * Allocate and initialize a cursor for the by-block tree. */ - bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, rbno, rlen, XFSA_FIXUP_CNT_OK))) goto error0; @@ -1515,8 +1515,7 @@ xfs_free_ag_extent( /* * Allocate and initialize a cursor for the by-block btree. */ - bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL, - 0); + bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); cnt_cur = NULL; /* * Look for a neighboring block on the left (lower block numbers) @@ -1575,8 +1574,7 @@ xfs_free_ag_extent( /* * Now allocate and initialize a cursor for the by-size tree. */ - cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL, - 0); + cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); /* * Have both left and right contiguous neighbors. * Merge all three into a single free block. diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 3ce2645508a..60c121f1e81 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -2209,3 +2209,48 @@ xfs_alloc_update( } return 0; } + +STATIC struct xfs_btree_cur * +xfs_allocbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_btnum); +} + +static const struct xfs_btree_ops xfs_allocbt_ops = { + .dup_cursor = xfs_allocbt_dup_cursor, +}; + +/* + * Allocate a new allocation btree cursor. + */ +struct xfs_btree_cur * /* new alloc btree cursor */ +xfs_allocbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agf structure */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* btree identifier */ +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_btree_cur *cur; + + ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]); + cur->bc_btnum = btnum; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + + cur->bc_ops = &xfs_allocbt_ops; + + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + + return cur; +} diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 5bd1a2c8bd0..60735384a4c 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -152,4 +152,9 @@ extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len); + +extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, + struct xfs_trans *, struct xfs_buf *, + xfs_agnumber_t, xfs_btnum_t); + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index a1aab9275d5..a84d0c30b48 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -422,8 +422,7 @@ xfs_bmap_add_attrfork_btree( if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) *flags |= XFS_ILOG_DBROOT; else { - cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, - XFS_DATA_FORK); + cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); cur->bc_private.b.flist = flist; cur->bc_private.b.firstblock = *firstblock; if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) @@ -3441,8 +3440,7 @@ xfs_bmap_extents_to_btree( * Need a cursor. Can't allocate until bb_level is filled in. */ mp = ip->i_mount; - cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, - whichfork); + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; @@ -5029,8 +5027,7 @@ xfs_bmapi( if (abno == NULLFSBLOCK) break; if ((ifp->if_flags & XFS_IFBROOT) && !cur) { - cur = xfs_btree_init_cursor(mp, - tp, NULL, 0, XFS_BTNUM_BMAP, + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; @@ -5147,9 +5144,8 @@ xfs_bmapi( */ ASSERT(mval->br_blockcount <= len); if ((ifp->if_flags & XFS_IFBROOT) && !cur) { - cur = xfs_btree_init_cursor(mp, - tp, NULL, 0, XFS_BTNUM_BMAP, - ip, whichfork); + cur = xfs_bmbt_init_cursor(mp, + tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; @@ -5440,8 +5436,7 @@ xfs_bunmapi( logflags = 0; if (ifp->if_flags & XFS_IFBROOT) { ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); - cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, - whichfork); + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = 0; diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 23efad29a5c..cfbdd00045c 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2608,3 +2608,62 @@ xfs_check_nostate_extents( } return 0; } + + +STATIC struct xfs_btree_cur * +xfs_bmbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + struct xfs_btree_cur *new; + + new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.b.ip, cur->bc_private.b.whichfork); + + /* + * Copy the firstblock, flist, and flags values, + * since init cursor doesn't get them. + */ + new->bc_private.b.firstblock = cur->bc_private.b.firstblock; + new->bc_private.b.flist = cur->bc_private.b.flist; + new->bc_private.b.flags = cur->bc_private.b.flags; + + return new; +} + +static const struct xfs_btree_ops xfs_bmbt_ops = { + .dup_cursor = xfs_bmbt_dup_cursor, +}; + +/* + * Allocate a new bmap btree cursor. + */ +struct xfs_btree_cur * /* new bmap btree cursor */ +xfs_bmbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* inode owning the btree */ + int whichfork) /* data or attr fork */ +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_cur *cur; + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; + cur->bc_btnum = XFS_BTNUM_BMAP; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + + cur->bc_ops = &xfs_bmbt_ops; + + cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); + cur->bc_private.b.ip = ip; + cur->bc_private.b.firstblock = NULLFSBLOCK; + cur->bc_private.b.flist = NULL; + cur->bc_private.b.allocated = 0; + cur->bc_private.b.flags = 0; + cur->bc_private.b.whichfork = whichfork; + + return cur; +} diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index cd0d4b4bb81..4f12fff5497 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -24,6 +24,7 @@ struct xfs_btree_cur; struct xfs_btree_lblock; struct xfs_mount; struct xfs_inode; +struct xfs_trans; /* * Bmap root header, on-disk form only. @@ -300,6 +301,9 @@ extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t, xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t); +extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, + struct xfs_trans *, struct xfs_inode *, int); + #endif /* __KERNEL__ */ #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 31002093bfb..074f7f6aa27 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -387,16 +387,17 @@ xfs_btree_dup_cursor( tp = cur->bc_tp; mp = cur->bc_mp; + /* * Allocate a new cursor like the old one. */ - new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp, - cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip, - cur->bc_private.b.whichfork); + new = cur->bc_ops->dup_cursor(cur); + /* * Copy the record currently in the cursor. */ new->bc_rec = cur->bc_rec; + /* * For each level current, re-get the buffer and copy the ptr value. */ @@ -416,15 +417,6 @@ xfs_btree_dup_cursor( } else new->bc_bufs[i] = NULL; } - /* - * For bmap btrees, copy the firstblock, flist, and flags values, - * since init cursor doesn't get them. - */ - if (new->bc_btnum == XFS_BTNUM_BMAP) { - new->bc_private.b.firstblock = cur->bc_private.b.firstblock; - new->bc_private.b.flist = cur->bc_private.b.flist; - new->bc_private.b.flags = cur->bc_private.b.flags; - } *ncur = new; return 0; } @@ -504,97 +496,6 @@ xfs_btree_get_bufs( return bp; } -/* - * Allocate a new btree cursor. - * The cursor is either for allocation (A) or bmap (B) or inodes (I). - */ -xfs_btree_cur_t * /* new btree cursor */ -xfs_btree_init_cursor( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* (A only) buffer for agf structure */ - /* (I only) buffer for agi structure */ - xfs_agnumber_t agno, /* (AI only) allocation group number */ - xfs_btnum_t btnum, /* btree identifier */ - xfs_inode_t *ip, /* (B only) inode owning the btree */ - int whichfork) /* (B only) data or attr fork */ -{ - xfs_agf_t *agf; /* (A) allocation group freespace */ - xfs_agi_t *agi; /* (I) allocation group inodespace */ - xfs_btree_cur_t *cur; /* return value */ - xfs_ifork_t *ifp; /* (I) inode fork pointer */ - int nlevels=0; /* number of levels in the btree */ - - ASSERT(xfs_btree_cur_zone != NULL); - /* - * Allocate a new cursor. - */ - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); - /* - * Deduce the number of btree levels from the arguments. - */ - switch (btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - agf = XFS_BUF_TO_AGF(agbp); - nlevels = be32_to_cpu(agf->agf_levels[btnum]); - break; - case XFS_BTNUM_BMAP: - ifp = XFS_IFORK_PTR(ip, whichfork); - nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; - break; - case XFS_BTNUM_INO: - agi = XFS_BUF_TO_AGI(agbp); - nlevels = be32_to_cpu(agi->agi_level); - break; - default: - ASSERT(0); - } - /* - * Fill in the common fields. - */ - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_nlevels = nlevels; - cur->bc_btnum = btnum; - cur->bc_blocklog = mp->m_sb.sb_blocklog; - /* - * Fill in private fields. - */ - switch (btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - /* - * Allocation btree fields. - */ - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - break; - case XFS_BTNUM_INO: - /* - * Inode allocation btree fields. - */ - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - break; - case XFS_BTNUM_BMAP: - /* - * Bmap btree fields. - */ - cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); - cur->bc_private.b.ip = ip; - cur->bc_private.b.firstblock = NULLFSBLOCK; - cur->bc_private.b.flist = NULL; - cur->bc_private.b.allocated = 0; - cur->bc_private.b.flags = 0; - cur->bc_private.b.whichfork = whichfork; - break; - default: - ASSERT(0); - } - return cur; -} - /* * Check for the cursor referring to the last block at the given level. */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 332b9f1da20..d30ee749860 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -131,6 +131,11 @@ extern const __uint32_t xfs_magics[]; #define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ +struct xfs_btree_ops { + /* cursor operations */ + struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); +}; + /* * Btree cursor structure. * This collects all information needed by the btree code in one place. @@ -139,6 +144,7 @@ typedef struct xfs_btree_cur { struct xfs_trans *bc_tp; /* transaction we're in, if any */ struct xfs_mount *bc_mp; /* file system mount struct */ + const struct xfs_btree_ops *bc_ops; union { xfs_alloc_rec_incore_t a; xfs_bmbt_irec_t b; @@ -307,20 +313,6 @@ xfs_btree_get_bufs( xfs_agblock_t agbno, /* allocation group block number */ uint lock); /* lock flags for get_buf */ -/* - * Allocate a new btree cursor. - * The cursor is either for allocation (A) or bmap (B). - */ -xfs_btree_cur_t * /* new btree cursor */ -xfs_btree_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* (A only) buffer for agf structure */ - xfs_agnumber_t agno, /* (A only) allocation group number */ - xfs_btnum_t btnum, /* btree identifier */ - struct xfs_inode *ip, /* (B only) inode owning the btree */ - int whichfork); /* (B only) data/attr fork */ - /* * Check for the cursor referring to the last block at the given level. */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index aad8c5da38a..11bb169561c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -335,8 +335,7 @@ xfs_ialloc_ag_alloc( /* * Insert records describing the new inode chunk into the btree. */ - cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno, - XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); for (thisino = newino; thisino < newino + newlen; thisino += XFS_INODES_PER_CHUNK) { @@ -676,8 +675,7 @@ nextag: */ agno = tagno; *IO_agbp = NULL; - cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno), - XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -1022,8 +1020,7 @@ xfs_difree( /* * Initialize the cursor. */ - cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, - (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); #ifdef DEBUG if (cur->bc_nlevels == 1) { int freecount = 0; @@ -1259,8 +1256,7 @@ xfs_dilocate( #endif /* DEBUG */ return error; } - cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, - (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 83502f3edef..8c0c4748a8d 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -2076,3 +2076,44 @@ xfs_inobt_update( } return 0; } + +STATIC struct xfs_btree_cur * +xfs_inobt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno); +} + +static const struct xfs_btree_ops xfs_inobt_ops = { + .dup_cursor = xfs_inobt_dup_cursor, +}; + +/* + * Allocate a new inode btree cursor. + */ +struct xfs_btree_cur * /* new inode btree cursor */ +xfs_inobt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agi structure */ + xfs_agnumber_t agno) /* allocation group number */ +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_btree_cur *cur; + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_nlevels = be32_to_cpu(agi->agi_level); + cur->bc_btnum = XFS_BTNUM_INO; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + + cur->bc_ops = &xfs_inobt_ops; + + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + + return cur; +} diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 8efc4a5b8b9..eea409349eb 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -175,4 +175,8 @@ extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino, __int32_t fcnt, xfs_inofree_t free); + +extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, + struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 4f4c9394106..a5f02f0e4c2 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -416,8 +416,7 @@ xfs_bulkstat( /* * Allocate and initialize a btree cursor for ialloc btree. */ - cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO, - (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); irbp = irbuf; irbufend = irbuf + nirbuf; end_of_ag = 0; @@ -842,8 +841,7 @@ xfs_inumbers( agino = 0; continue; } - cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, - XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); -- cgit v1.2.3 From de227dd9604934d2a6d33cd332d1be431719c93e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:12 +1100 Subject: [XFS] add generic btree types Add generic union types for btree pointers, keys and records. The generic btree pointer contains either a 32 and 64bit big endian scalar for short and long form btrees, and the key and record contain the relevant type for each possible btree. Split out from a bigger patch from Dave Chinner and simplified a little further. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32178a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_btree.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index d30ee749860..428e81f0e27 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -79,6 +79,31 @@ typedef struct xfs_btree_block { } bb_u; /* rest */ } xfs_btree_block_t; +/* + * Generic key, ptr and record wrapper structures. + * + * These are disk format structures, and are converted where necessary + * by the btree specific code that needs to interpret them. + */ +union xfs_btree_ptr { + __be32 s; /* short form ptr */ + __be64 l; /* long form ptr */ +}; + +union xfs_btree_key { + xfs_bmbt_key_t bmbt; + xfs_bmdr_key_t bmbr; /* bmbt root block */ + xfs_alloc_key_t alloc; + xfs_inobt_key_t inobt; +}; + +union xfs_btree_rec { + xfs_bmbt_rec_t bmbt; + xfs_bmdr_rec_t bmbr; /* bmbt root block */ + xfs_alloc_rec_t alloc; + xfs_inobt_rec_t inobt; +}; + /* * For logging record fields. */ -- cgit v1.2.3 From 8186e517fab1854554c48955cdbcbb6710e7baef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:22 +1100 Subject: [XFS] make btree root in inode support generic The bmap btree is rooted in the inode and not in a disk block. Make the support for this feature more generic by adding a btree flag to for this feature instead of relying on the XFS_BTNUM_BMAP btnum check. Also clean up xfs_btree_get_block where this new flag is used. Based upon a patch from Dave Chinner. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32180a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_bmap_btree.c | 1 + fs/xfs/xfs_btree.c | 47 +++++++++++++++++++++++++++-------------------- fs/xfs/xfs_btree.h | 5 +++++ 3 files changed, 33 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index cfbdd00045c..d9bbed676e0 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2656,6 +2656,7 @@ xfs_bmbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_bmbt_ops; + cur->bc_flags = XFS_BTREE_ROOT_IN_INODE; cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); cur->bc_private.b.ip = ip; diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 074f7f6aa27..57e858fbf68 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -421,33 +421,40 @@ xfs_btree_dup_cursor( return 0; } +/* + * Get a the root block which is stored in the inode. + * + * For now this btree implementation assumes the btree root is always + * stored in the if_broot field of an inode fork. + */ +STATIC struct xfs_btree_block * +xfs_btree_get_iroot( + struct xfs_btree_cur *cur) +{ + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); + return (struct xfs_btree_block *)ifp->if_broot; +} + /* * Retrieve the block pointer from the cursor at the given level. - * This may be a bmap btree root or from a buffer. + * This may be an inode btree root or from a buffer. */ -STATIC xfs_btree_block_t * /* generic btree block pointer */ +STATIC struct xfs_btree_block * /* generic btree block pointer */ xfs_btree_get_block( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level, /* level in btree */ - xfs_buf_t **bpp) /* buffer containing the block */ + struct xfs_buf **bpp) /* buffer containing the block */ { - xfs_btree_block_t *block; /* return value */ - xfs_buf_t *bp; /* return buffer */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int whichfork; /* data or attr fork */ - - if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) { - whichfork = cur->bc_private.b.whichfork; - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork); - block = (xfs_btree_block_t *)ifp->if_broot; - bp = NULL; - } else { - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_BLOCK(bp); + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) { + *bpp = NULL; + return xfs_btree_get_iroot(cur); } - ASSERT(block != NULL); - *bpp = bp; - return block; + + *bpp = cur->bc_bufs[level]; + return XFS_BUF_TO_BLOCK(*bpp); } /* diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 428e81f0e27..fefbc69e500 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -170,6 +170,7 @@ typedef struct xfs_btree_cur struct xfs_trans *bc_tp; /* transaction we're in, if any */ struct xfs_mount *bc_mp; /* file system mount struct */ const struct xfs_btree_ops *bc_ops; + uint bc_flags; /* btree features - below */ union { xfs_alloc_rec_incore_t a; xfs_bmbt_irec_t b; @@ -201,6 +202,10 @@ typedef struct xfs_btree_cur } bc_private; /* per-btree type data */ } xfs_btree_cur_t; +/* cursor flags */ +#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ + + #define XFS_BTREE_NOERROR 0 #define XFS_BTREE_ERROR 1 -- cgit v1.2.3 From e99ab90d6a9e8ac92f05d2c31d44aa7feee15394 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:33 +1100 Subject: [XFS] add a long pointers flag to xfs_btree_cur Add a flag to the xfs btree cursor when using long (64bit) block pointers instead of checking btnum == XFS_BTNUM_BMAP. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32181a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_bmap_btree.c | 2 +- fs/xfs/xfs_btree.c | 6 +++--- fs/xfs/xfs_btree.h | 6 +----- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index d9bbed676e0..1ec494e111b 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2656,7 +2656,7 @@ xfs_bmbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_bmbt_ops; - cur->bc_flags = XFS_BTREE_ROOT_IN_INODE; + cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); cur->bc_private.b.ip = ip; diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 57e858fbf68..59796b42e9c 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -90,7 +90,7 @@ xfs_btree_check_block( int level, /* level of the btree block */ xfs_buf_t *bp) /* buffer containing block, if any */ { - if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level, bp); else @@ -516,7 +516,7 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); - if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; else return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; @@ -808,7 +808,7 @@ xfs_btree_setbuf( if (!bp) return; b = XFS_BUF_TO_BLOCK(bp); - if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) { + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index fefbc69e500..dd93fd39c56 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -115,11 +115,6 @@ union xfs_btree_rec { #define XFS_BB_NUM_BITS 5 #define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) -/* - * Boolean to select which form of xfs_btree_block_t.bb_u to use. - */ -#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP) - /* * Magic numbers for btree blocks. */ @@ -203,6 +198,7 @@ typedef struct xfs_btree_cur } xfs_btree_cur_t; /* cursor flags */ +#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ #define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ -- cgit v1.2.3 From b524bfeee2152fa64b6210f28ced80489b9d2439 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:43 +1100 Subject: [XFS] refactor xfs_btree_readahead From: Dave Chinner Refactor xfs_btree_readahead to make it more readable: (a) remove the inline xfs_btree_readahead wrapper and move all checks out of line into the main routine. (b) factor out helpers for short/long form btrees (c) move check for root in inodes from the callers into xfs_btree_readahead [hch: split out from a big patch and minor cleanups] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32182a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_bmap_btree.c | 15 +++--- fs/xfs/xfs_btree.c | 118 ++++++++++++++++++++++++++++-------------------- fs/xfs/xfs_btree.h | 15 +----- 3 files changed, 76 insertions(+), 72 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 1ec494e111b..519249e2053 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -1721,8 +1721,9 @@ xfs_bmbt_decrement( XFS_BMBT_TRACE_CURSOR(cur, ENTRY); XFS_BMBT_TRACE_ARGI(cur, level); ASSERT(level < cur->bc_nlevels); - if (level < cur->bc_nlevels - 1) - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + if (--cur->bc_ptrs[level] > 0) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); *stat = 1; @@ -1743,8 +1744,7 @@ xfs_bmbt_decrement( for (lev = level + 1; lev < cur->bc_nlevels; lev++) { if (--cur->bc_ptrs[lev] > 0) break; - if (lev < cur->bc_nlevels - 1) - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); } if (lev == cur->bc_nlevels) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); @@ -1995,8 +1995,8 @@ xfs_bmbt_increment( XFS_BMBT_TRACE_CURSOR(cur, ENTRY); XFS_BMBT_TRACE_ARGI(cur, level); ASSERT(level < cur->bc_nlevels); - if (level < cur->bc_nlevels - 1) - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); block = xfs_bmbt_get_block(cur, level, &bp); #ifdef DEBUG if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { @@ -2024,8 +2024,7 @@ xfs_bmbt_increment( #endif if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs)) break; - if (lev < cur->bc_nlevels - 1) - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); } if (lev == cur->bc_nlevels) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 59796b42e9c..4d793e4ccdc 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -725,66 +725,84 @@ xfs_btree_reada_bufs( xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); } +STATIC int +xfs_btree_readahead_lblock( + struct xfs_btree_cur *cur, + int lr, + struct xfs_btree_block *block) +{ + int rval = 0; + xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); + xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); + + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { + xfs_btree_reada_bufl(cur->bc_mp, left, 1); + rval++; + } + + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { + xfs_btree_reada_bufl(cur->bc_mp, right, 1); + rval++; + } + + return rval; +} + +STATIC int +xfs_btree_readahead_sblock( + struct xfs_btree_cur *cur, + int lr, + struct xfs_btree_block *block) +{ + int rval = 0; + xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); + xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); + + + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + left, 1); + rval++; + } + + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + right, 1); + rval++; + } + + return rval; +} + /* * Read-ahead btree blocks, at the given level. * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. */ int -xfs_btree_readahead_core( - xfs_btree_cur_t *cur, /* btree cursor */ +xfs_btree_readahead( + struct xfs_btree_cur *cur, /* btree cursor */ int lev, /* level in btree */ int lr) /* left/right bits */ { - xfs_alloc_block_t *a; - xfs_bmbt_block_t *b; - xfs_inobt_block_t *i; - int rval = 0; + struct xfs_btree_block *block; + + /* + * No readahead needed if we are at the root level and the + * btree root is stored in the inode. + */ + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (lev == cur->bc_nlevels - 1)) + return 0; + + if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) + return 0; - ASSERT(cur->bc_bufs[lev] != NULL); cur->bc_ra[lev] |= lr; - switch (cur->bc_btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); - if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(a->bb_leftsib), 1); - rval++; - } - if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(a->bb_rightsib), 1); - rval++; - } - break; - case XFS_BTNUM_BMAP: - b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]); - if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1); - rval++; - } - if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1); - rval++; - } - break; - case XFS_BTNUM_INO: - i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); - if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(i->bb_leftsib), 1); - rval++; - } - if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(i->bb_rightsib), 1); - rval++; - } - break; - default: - ASSERT(0); - } - return rval; + block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return xfs_btree_readahead_lblock(cur, lr, block); + return xfs_btree_readahead_sblock(cur, lr, block); } /* diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index dd93fd39c56..8be838f0154 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -421,23 +421,10 @@ xfs_btree_reada_bufs( * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. */ int /* readahead block count */ -xfs_btree_readahead_core( - xfs_btree_cur_t *cur, /* btree cursor */ - int lev, /* level in btree */ - int lr); /* left/right bits */ - -static inline int /* readahead block count */ xfs_btree_readahead( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ - int lr) /* left/right bits */ -{ - if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) - return 0; - - return xfs_btree_readahead_core(cur, lev, lr); -} - + int lr); /* left/right bits */ /* * Set the buffer for level "lev" in the cursor to bp, releasing -- cgit v1.2.3 From a23f6ef8ce966abc0f6e24a81ceb6a74ed30693b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:53 +1100 Subject: [XFS] refactor btree validation helpers Move the various btree validation helpers around in xfs_btree.c so that they are close to each other and in common #ifdef DEBUG sections. Also add a new xfs_btree_check_ptr helper to check a btree ptr that can be either long or short form. Split out from a bigger patch from Dave Chinner with various small changes applied by me. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32183a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_btree.c | 196 +++++++++++++++++++++++++++-------------------------- fs/xfs/xfs_btree.h | 97 ++++++++++++++------------ 2 files changed, 155 insertions(+), 138 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 4d793e4ccdc..966d58d50fa 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -80,24 +80,6 @@ xfs_btree_maxrecs( */ #ifdef DEBUG -/* - * Debug routine: check that block header is ok. - */ -void -xfs_btree_check_block( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_block_t *block, /* generic btree block pointer */ - int level, /* level of the btree block */ - xfs_buf_t *bp) /* buffer containing block, if any */ -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level, - bp); - else - xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level, - bp); -} - /* * Debug routine: check that keys are in the right order. */ @@ -150,65 +132,7 @@ xfs_btree_check_key( ASSERT(0); } } -#endif /* DEBUG */ - -/* - * Checking routine: check that long form block header is ok. - */ -/* ARGSUSED */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lblock( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_lblock_t *block, /* btree long form block pointer */ - int level, /* level of the btree block */ - xfs_buf_t *bp) /* buffer for block, if any */ -{ - int lblock_ok; /* block passes checks */ - xfs_mount_t *mp; /* file system mount point */ - - mp = cur->bc_mp; - lblock_ok = - be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && - be16_to_cpu(block->bb_level) == level && - be16_to_cpu(block->bb_numrecs) <= - xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && - block->bb_leftsib && - (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || - XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && - block->bb_rightsib && - (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || - XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); - if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, - XFS_RANDOM_BTREE_CHECK_LBLOCK))) { - if (bp) - xfs_buftrace("LBTREE ERROR", bp); - XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, - mp); - return XFS_ERROR(EFSCORRUPTED); - } - return 0; -} -/* - * Checking routine: check that (long) pointer is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lptr( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_dfsbno_t ptr, /* btree block disk address */ - int level) /* btree block level */ -{ - xfs_mount_t *mp; /* file system mount point */ - - mp = cur->bc_mp; - XFS_WANT_CORRUPTED_RETURN( - level > 0 && - ptr != NULLDFSBNO && - XFS_FSB_SANITY_CHECK(mp, ptr)); - return 0; -} - -#ifdef DEBUG /* * Debug routine: check that records are in the right order. */ @@ -268,19 +192,49 @@ xfs_btree_check_rec( } #endif /* DEBUG */ -/* - * Checking routine: check that block header is ok. - */ -/* ARGSUSED */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lblock( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_lblock *block, /* btree long form block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp) /* buffer for block, if any */ +{ + int lblock_ok; /* block passes checks */ + struct xfs_mount *mp; /* file system mount point */ + + mp = cur->bc_mp; + lblock_ok = + be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && + be16_to_cpu(block->bb_level) == level && + be16_to_cpu(block->bb_numrecs) <= + xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && + block->bb_leftsib && + (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && + block->bb_rightsib && + (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); + if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, + XFS_ERRTAG_BTREE_CHECK_LBLOCK, + XFS_RANDOM_BTREE_CHECK_LBLOCK))) { + if (bp) + xfs_buftrace("LBTREE ERROR", bp); + XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, + mp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} + int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sblock( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_sblock_t *block, /* btree short form block pointer */ + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_sblock *block, /* btree short form block pointer */ int level, /* level of the btree block */ - xfs_buf_t *bp) /* buffer containing block */ + struct xfs_buf *bp) /* buffer containing block */ { - xfs_buf_t *agbp; /* buffer for ag. freespace struct */ - xfs_agf_t *agf; /* ag. freespace structure */ + struct xfs_buf *agbp; /* buffer for ag. freespace struct */ + struct xfs_agf *agf; /* ag. freespace structure */ xfs_agblock_t agflen; /* native ag. freespace length */ int sblock_ok; /* block passes checks */ @@ -311,26 +265,78 @@ xfs_btree_check_sblock( } /* - * Checking routine: check that (short) pointer is ok. + * Debug routine: check that block header is ok. + */ +int +xfs_btree_check_block( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_block *block, /* generic btree block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp) /* buffer containing block, if any */ +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + return xfs_btree_check_lblock(cur, + (struct xfs_btree_lblock *)block, level, bp); + } else { + return xfs_btree_check_sblock(cur, + (struct xfs_btree_sblock *)block, level, bp); + } +} + +/* + * Check that (long) pointer is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lptr( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_dfsbno_t bno, /* btree block disk address */ + int level) /* btree block level */ +{ + XFS_WANT_CORRUPTED_RETURN( + level > 0 && + bno != NULLDFSBNO && + XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); + return 0; +} + +/* + * Check that (short) pointer is ok. */ int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sptr( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t ptr, /* btree block disk address */ - int level) /* btree block level */ + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* btree block disk address */ + int level) /* btree block level */ { - xfs_buf_t *agbp; /* buffer for ag. freespace struct */ - xfs_agf_t *agf; /* ag. freespace structure */ + xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; - agbp = cur->bc_private.a.agbp; - agf = XFS_BUF_TO_AGF(agbp); XFS_WANT_CORRUPTED_RETURN( level > 0 && - ptr != NULLAGBLOCK && ptr != 0 && - ptr < be32_to_cpu(agf->agf_length)); + bno != NULLAGBLOCK && + bno != 0 && + bno < agblocks); return 0; } +/* + * Check that block ptr is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_ptr( + struct xfs_btree_cur *cur, /* btree cursor */ + union xfs_btree_ptr *ptr, /* btree block disk address */ + int index, /* offset from ptr to check */ + int level) /* btree block level */ +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + return xfs_btree_check_lptr(cur, + be64_to_cpu((&ptr->l)[index]), level); + } else { + return xfs_btree_check_sptr(cur, + be32_to_cpu((&ptr->s)[index]), level); + } +} + /* * Delete the btree cursor. */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 8be838f0154..a57918276d9 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -215,81 +215,92 @@ typedef struct xfs_btree_cur #ifdef __KERNEL__ -#ifdef DEBUG /* - * Debug routine: check that block header is ok. + * Check that long form block header is ok. */ -void -xfs_btree_check_block( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_block_t *block, /* generic btree block pointer */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lblock( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_lblock *block, /* btree long form block pointer */ int level, /* level of the btree block */ struct xfs_buf *bp); /* buffer containing block, if any */ /* - * Debug routine: check that keys are in the right order. + * Check that short form block header is ok. */ -void -xfs_btree_check_key( - xfs_btnum_t btnum, /* btree identifier */ - void *ak1, /* pointer to left (lower) key */ - void *ak2); /* pointer to right (higher) key */ - -/* - * Debug routine: check that records are in the right order. - */ -void -xfs_btree_check_rec( - xfs_btnum_t btnum, /* btree identifier */ - void *ar1, /* pointer to left (lower) record */ - void *ar2); /* pointer to right (higher) record */ -#else -#define xfs_btree_check_block(a,b,c,d) -#define xfs_btree_check_key(a,b,c) -#define xfs_btree_check_rec(a,b,c) -#endif /* DEBUG */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sblock( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_sblock *block, /* btree short form block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp); /* buffer containing block */ /* - * Checking routine: check that long form block header is ok. + * Check that block header is ok. */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lblock( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_lblock_t *block, /* btree long form block pointer */ +int +xfs_btree_check_block( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_block *block, /* generic btree block pointer */ int level, /* level of the btree block */ struct xfs_buf *bp); /* buffer containing block, if any */ /* - * Checking routine: check that (long) pointer is ok. + * Check that (long) pointer is ok. */ int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lptr( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ xfs_dfsbno_t ptr, /* btree block disk address */ int level); /* btree block level */ #define xfs_btree_check_lptr_disk(cur, ptr, level) \ xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level) + /* - * Checking routine: check that short form block header is ok. + * Check that (short) pointer is ok. */ int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sblock( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_sblock_t *block, /* btree short form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp); /* buffer containing block */ +xfs_btree_check_sptr( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t ptr, /* btree block disk address */ + int level); /* btree block level */ /* - * Checking routine: check that (short) pointer is ok. + * Check that (short) pointer is ok. */ int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sptr( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t ptr, /* btree block disk address */ +xfs_btree_check_ptr( + struct xfs_btree_cur *cur, /* btree cursor */ + union xfs_btree_ptr *ptr, /* btree block disk address */ + int index, /* offset from ptr to check */ int level); /* btree block level */ +#ifdef DEBUG + +/* + * Debug routine: check that keys are in the right order. + */ +void +xfs_btree_check_key( + xfs_btnum_t btnum, /* btree identifier */ + void *ak1, /* pointer to left (lower) key */ + void *ak2); /* pointer to right (higher) key */ + +/* + * Debug routine: check that records are in the right order. + */ +void +xfs_btree_check_rec( + xfs_btnum_t btnum, /* btree identifier */ + void *ar1, /* pointer to left (lower) record */ + void *ar2); /* pointer to right (higher) record */ +#else +#define xfs_btree_check_key(a, b, c) +#define xfs_btree_check_rec(a, b, c) +#endif /* DEBUG */ + /* * Delete the btree cursor. */ -- cgit v1.2.3 From 854929f05831d3a290a802815ee955b96c740c61 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 16:55:03 +1100 Subject: [XFS] add new btree statistics From: Dave Chinner Introduce statistics coverage of all the btrees and cover all the btree operations, not just some. Invaluable for determining test code coverage of all the btree operations.... SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32184a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell --- fs/xfs/linux-2.6/xfs_stats.c | 4 +++ fs/xfs/linux-2.6/xfs_stats.h | 65 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 28 +++++++++++++++++++ 3 files changed, 97 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index 3d5b67c075c..64f4ec90b8b 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -53,6 +53,10 @@ xfs_read_xfsstats( { "icluster", XFSSTAT_END_INODE_CLUSTER }, { "vnodes", XFSSTAT_END_VNODE_OPS }, { "buf", XFSSTAT_END_BUF }, + { "abtb2", XFSSTAT_END_ABTB_V2 }, + { "abtc2", XFSSTAT_END_ABTC_V2 }, + { "bmbt2", XFSSTAT_END_BMBT_V2 }, + { "ibt2", XFSSTAT_END_IBT_V2 }, }; /* Loop over all stats groups */ diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h index e83820febc9..736854b1ca1 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/linux-2.6/xfs_stats.h @@ -118,6 +118,71 @@ struct xfsstats { __uint32_t xb_page_retries; __uint32_t xb_page_found; __uint32_t xb_get_read; +/* Version 2 btree counters */ +#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15) + __uint32_t xs_abtb_2_lookup; + __uint32_t xs_abtb_2_compare; + __uint32_t xs_abtb_2_insrec; + __uint32_t xs_abtb_2_delrec; + __uint32_t xs_abtb_2_newroot; + __uint32_t xs_abtb_2_killroot; + __uint32_t xs_abtb_2_increment; + __uint32_t xs_abtb_2_decrement; + __uint32_t xs_abtb_2_lshift; + __uint32_t xs_abtb_2_rshift; + __uint32_t xs_abtb_2_split; + __uint32_t xs_abtb_2_join; + __uint32_t xs_abtb_2_alloc; + __uint32_t xs_abtb_2_free; + __uint32_t xs_abtb_2_moves; +#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15) + __uint32_t xs_abtc_2_lookup; + __uint32_t xs_abtc_2_compare; + __uint32_t xs_abtc_2_insrec; + __uint32_t xs_abtc_2_delrec; + __uint32_t xs_abtc_2_newroot; + __uint32_t xs_abtc_2_killroot; + __uint32_t xs_abtc_2_increment; + __uint32_t xs_abtc_2_decrement; + __uint32_t xs_abtc_2_lshift; + __uint32_t xs_abtc_2_rshift; + __uint32_t xs_abtc_2_split; + __uint32_t xs_abtc_2_join; + __uint32_t xs_abtc_2_alloc; + __uint32_t xs_abtc_2_free; + __uint32_t xs_abtc_2_moves; +#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15) + __uint32_t xs_bmbt_2_lookup; + __uint32_t xs_bmbt_2_compare; + __uint32_t xs_bmbt_2_insrec; + __uint32_t xs_bmbt_2_delrec; + __uint32_t xs_bmbt_2_newroot; + __uint32_t xs_bmbt_2_killroot; + __uint32_t xs_bmbt_2_increment; + __uint32_t xs_bmbt_2_decrement; + __uint32_t xs_bmbt_2_lshift; + __uint32_t xs_bmbt_2_rshift; + __uint32_t xs_bmbt_2_split; + __uint32_t xs_bmbt_2_join; + __uint32_t xs_bmbt_2_alloc; + __uint32_t xs_bmbt_2_free; + __uint32_t xs_bmbt_2_moves; +#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15) + __uint32_t xs_ibt_2_lookup; + __uint32_t xs_ibt_2_compare; + __uint32_t xs_ibt_2_insrec; + __uint32_t xs_ibt_2_delrec; + __uint32_t xs_ibt_2_newroot; + __uint32_t xs_ibt_2_killroot; + __uint32_t xs_ibt_2_increment; + __uint32_t xs_ibt_2_decrement; + __uint32_t xs_ibt_2_lshift; + __uint32_t xs_ibt_2_rshift; + __uint32_t xs_ibt_2_split; + __uint32_t xs_ibt_2_join; + __uint32_t xs_ibt_2_alloc; + __uint32_t xs_ibt_2_free; + __uint32_t xs_ibt_2_moves; /* Extra precision counters */ __uint64_t xs_xstrat_bytes; __uint64_t xs_write_bytes; diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index a57918276d9..57d3bd37526 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -120,6 +120,34 @@ union xfs_btree_rec { */ extern const __uint32_t xfs_magics[]; +/* + * Generic stats interface + */ +#define __XFS_BTREE_STATS_INC(type, stat) \ + XFS_STATS_INC(xs_ ## type ## _2_ ## stat) +#define XFS_BTREE_STATS_INC(cur, stat) \ +do { \ + switch (cur->bc_btnum) { \ + case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \ + case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ + case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ + case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ + case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ + } \ +} while (0) + +#define __XFS_BTREE_STATS_ADD(type, stat, val) \ + XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val) +#define XFS_BTREE_STATS_ADD(cur, stat, val) \ +do { \ + switch (cur->bc_btnum) { \ + case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \ + case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ + case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ + case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ + case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ + } \ +} while (0) /* * Maximum and minimum records in a btree block. * Given block size, type prefix, and leaf flag (0 or 1). -- cgit v1.2.3 From 8c4ed633e65d0bd0a25d45aad9b4646e3236cad7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:13 +1100 Subject: [XFS] make btree tracing generic Make the existing bmap btree tracing generic so that it applies to all btree types. Some fragments lifted from a patch by Dave Chinner. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32187a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/Makefile | 3 +- fs/xfs/linux-2.6/xfs_super.c | 24 ++- fs/xfs/xfs.h | 2 +- fs/xfs/xfs_alloc_btree.c | 73 +++++++++ fs/xfs/xfs_bmap_btree.c | 356 ++++++++++++++----------------------------- fs/xfs/xfs_bmap_btree.h | 18 --- fs/xfs/xfs_btree.h | 19 +++ fs/xfs/xfs_ialloc_btree.c | 73 +++++++++ fs/xfs/xfs_inode.c | 5 +- fs/xfs/xfs_inode.h | 2 +- 10 files changed, 309 insertions(+), 266 deletions(-) (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 737c9a42536..75b2be72c39 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -91,7 +91,8 @@ xfs-y += xfs_alloc.o \ xfs_dmops.o \ xfs_qmops.o -xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o +xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ + xfs_dir2_trace.o # Objects in linux/ xfs-y += $(addprefix $(XFS_LINUX)/, \ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9bfb26066a8..0c71c215853 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -36,6 +36,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -1916,10 +1917,19 @@ xfs_alloc_trace_bufs(void) if (!xfs_bmap_trace_buf) goto out_free_alloc_trace; #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE + xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE, + KM_MAYFAIL); + if (!xfs_allocbt_trace_buf) + goto out_free_bmap_trace; + + xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL); + if (!xfs_inobt_trace_buf) + goto out_free_allocbt_trace; + xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); if (!xfs_bmbt_trace_buf) - goto out_free_bmap_trace; + goto out_free_inobt_trace; #endif #ifdef XFS_ATTR_TRACE xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); @@ -1941,8 +1951,12 @@ xfs_alloc_trace_bufs(void) ktrace_free(xfs_attr_trace_buf); out_free_bmbt_trace: #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ktrace_free(xfs_bmbt_trace_buf); + out_free_inobt_trace: + ktrace_free(xfs_inobt_trace_buf); + out_free_allocbt_trace: + ktrace_free(xfs_allocbt_trace_buf); out_free_bmap_trace: #endif #ifdef XFS_BMAP_TRACE @@ -1965,8 +1979,10 @@ xfs_free_trace_bufs(void) #ifdef XFS_ATTR_TRACE ktrace_free(xfs_attr_trace_buf); #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ktrace_free(xfs_bmbt_trace_buf); + ktrace_free(xfs_inobt_trace_buf); + ktrace_free(xfs_allocbt_trace_buf); #endif #ifdef XFS_BMAP_TRACE ktrace_free(xfs_bmap_trace_buf); diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 540e4c98982..17254b529c5 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -30,7 +30,7 @@ #define XFS_ATTR_TRACE 1 #define XFS_BLI_TRACE 1 #define XFS_BMAP_TRACE 1 -#define XFS_BMBT_TRACE 1 +#define XFS_BTREE_TRACE 1 #define XFS_DIR2_TRACE 1 #define XFS_DQUOT_TRACE 1 #define XFS_ILOCK_TRACE 1 diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 60c121f1e81..9c91dfcb6f2 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -2219,8 +2219,81 @@ xfs_allocbt_dup_cursor( cur->bc_btnum); } +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_allocbt_trace_buf; + +STATIC void +xfs_allocbt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type, + (void *)func, (void *)s, NULL, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_allocbt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + *s0 = cur->bc_private.a.agno; + *l0 = cur->bc_rec.a.ar_startblock; + *l1 = cur->bc_rec.a.ar_blockcount; +} + +STATIC void +xfs_allocbt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be32_to_cpu(key->alloc.ar_startblock); + *l1 = be32_to_cpu(key->alloc.ar_blockcount); +} + +STATIC void +xfs_allocbt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + *l0 = be32_to_cpu(rec->alloc.ar_startblock); + *l1 = be32_to_cpu(rec->alloc.ar_blockcount); + *l2 = 0; +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_allocbt_trace_enter, + .trace_cursor = xfs_allocbt_trace_cursor, + .trace_key = xfs_allocbt_trace_key, + .trace_record = xfs_allocbt_trace_record, +#endif }; /* diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 519249e2053..16f2fde6433 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -37,16 +37,13 @@ #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_quota.h" -#if defined(XFS_BMBT_TRACE) -ktrace_t *xfs_bmbt_trace_buf; -#endif - /* * Prototypes for internal btree functions. */ @@ -61,245 +58,33 @@ STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, __uint64_t *, xfs_btree_cur_t **, int *); STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int); - -#if defined(XFS_BMBT_TRACE) - -static char ARGS[] = "args"; -static char ENTRY[] = "entry"; -static char ERROR[] = "error"; #undef EXIT -static char EXIT[] = "exit"; -/* - * Add a trace buffer entry for the arguments given to the routine, - * generic form. - */ -STATIC void -xfs_bmbt_trace_enter( - const char *func, - xfs_btree_cur_t *cur, - char *s, - int type, - int line, - __psunsigned_t a0, - __psunsigned_t a1, - __psunsigned_t a2, - __psunsigned_t a3, - __psunsigned_t a4, - __psunsigned_t a5, - __psunsigned_t a6, - __psunsigned_t a7, - __psunsigned_t a8, - __psunsigned_t a9, - __psunsigned_t a10) -{ - xfs_inode_t *ip; - int whichfork; +#define ENTRY XBT_ENTRY +#define ERROR XBT_ERROR +#define EXIT XBT_EXIT - ip = cur->bc_private.b.ip; - whichfork = cur->bc_private.b.whichfork; - ktrace_enter(xfs_bmbt_trace_buf, - (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), - (void *)func, (void *)s, (void *)ip, (void *)cur, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10); - ASSERT(ip->i_btrace); - ktrace_enter(ip->i_btrace, - (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), - (void *)func, (void *)s, (void *)ip, (void *)cur, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10); -} /* - * Add a trace buffer entry for arguments, for a buffer & 1 integer arg. + * Keep the XFS_BMBT_TRACE_ names around for now until all code using them + * is converted to be generic and thus switches to the XFS_BTREE_TRACE_ names. */ -STATIC void -xfs_bmbt_trace_argbi( - const char *func, - xfs_btree_cur_t *cur, - xfs_buf_t *b, - int i, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBI, line, - (__psunsigned_t)b, i, 0, 0, - 0, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for a buffer & 2 integer args. - */ -STATIC void -xfs_bmbt_trace_argbii( - const char *func, - xfs_btree_cur_t *cur, - xfs_buf_t *b, - int i0, - int i1, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBII, line, - (__psunsigned_t)b, i0, i1, 0, - 0, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for 3 block-length args - * and an integer arg. - */ -STATIC void -xfs_bmbt_trace_argfffi( - const char *func, - xfs_btree_cur_t *cur, - xfs_dfiloff_t o, - xfs_dfsbno_t b, - xfs_dfilblks_t i, - int j, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGFFFI, line, - o >> 32, (int)o, b >> 32, (int)b, - i >> 32, (int)i, (int)j, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for one integer arg. - */ -STATIC void -xfs_bmbt_trace_argi( - const char *func, - xfs_btree_cur_t *cur, - int i, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGI, line, - i, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, fsblock, key. - */ -STATIC void -xfs_bmbt_trace_argifk( - const char *func, - xfs_btree_cur_t *cur, - int i, - xfs_fsblock_t f, - xfs_dfiloff_t o, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line, - i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32, - (int)o, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, fsblock, rec. - */ -STATIC void -xfs_bmbt_trace_argifr( - const char *func, - xfs_btree_cur_t *cur, - int i, - xfs_fsblock_t f, - xfs_bmbt_rec_t *r, - int line) -{ - xfs_dfsbno_t b; - xfs_dfilblks_t c; - xfs_dfsbno_t d; - xfs_dfiloff_t o; - xfs_bmbt_irec_t s; - - d = (xfs_dfsbno_t)f; - xfs_bmbt_disk_get_all(r, &s); - o = (xfs_dfiloff_t)s.br_startoff; - b = (xfs_dfsbno_t)s.br_startblock; - c = s.br_blockcount; - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFR, line, - i, d >> 32, (int)d, o >> 32, - (int)o, b >> 32, (int)b, c >> 32, - (int)c, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, key. - */ -STATIC void -xfs_bmbt_trace_argik( - const char *func, - xfs_btree_cur_t *cur, - int i, - xfs_bmbt_key_t *k, - int line) -{ - xfs_dfiloff_t o; - - o = be64_to_cpu(k->br_startoff); - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line, - i, o >> 32, (int)o, 0, - 0, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for the cursor/operation. - */ -STATIC void -xfs_bmbt_trace_cursor( - const char *func, - xfs_btree_cur_t *cur, - char *s, - int line) -{ - xfs_bmbt_rec_host_t r; - - xfs_bmbt_set_all(&r, &cur->bc_rec.b); - xfs_bmbt_trace_enter(func, cur, s, XFS_BMBT_KTRACE_CUR, line, - (cur->bc_nlevels << 24) | (cur->bc_private.b.flags << 16) | - cur->bc_private.b.allocated, - r.l0 >> 32, (int)r.l0, - r.l1 >> 32, (int)r.l1, - (unsigned long)cur->bc_bufs[0], (unsigned long)cur->bc_bufs[1], - (unsigned long)cur->bc_bufs[2], (unsigned long)cur->bc_bufs[3], - (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1], - (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]); -} - -#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ - xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__) -#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ - xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__) -#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ - xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__) -#define XFS_BMBT_TRACE_ARGI(c,i) \ - xfs_bmbt_trace_argi(__func__, c, i, __LINE__) -#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ - xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__) -#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ - xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__) -#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ - xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__) -#define XFS_BMBT_TRACE_CURSOR(c,s) \ - xfs_bmbt_trace_cursor(__func__, c, s, __LINE__) -#else -#define XFS_BMBT_TRACE_ARGBI(c,b,i) -#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) -#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) -#define XFS_BMBT_TRACE_ARGI(c,i) -#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) -#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) -#define XFS_BMBT_TRACE_ARGIK(c,i,k) -#define XFS_BMBT_TRACE_CURSOR(c,s) -#endif /* XFS_BMBT_TRACE */ +#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ + XFS_BTREE_TRACE_ARGBI(c,b,i) +#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ + XFS_BTREE_TRACE_ARGBII(c,b,i,j) +#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ + XFS_BTREE_TRACE_ARGFFFI(c,o,b,i,j) +#define XFS_BMBT_TRACE_ARGI(c,i) \ + XFS_BTREE_TRACE_ARGI(c,i) +#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ + XFS_BTREE_TRACE_ARGIPK(c,i,(union xfs_btree_ptr)f,s) +#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ + XFS_BTREE_TRACE_ARGIPR(c,i, \ + (union xfs_btree_ptr)f, (union xfs_btree_rec *)r) +#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ + XFS_BTREE_TRACE_ARGIK(c,i,(union xfs_btree_key *)k) +#define XFS_BMBT_TRACE_CURSOR(c,s) \ + XFS_BTREE_TRACE_CURSOR(c,s) /* @@ -1485,7 +1270,8 @@ xfs_bmbt_split( xfs_bmbt_rec_t *rrp; /* right record pointer */ XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff); + // disable until merged into common code +// XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff); args.tp = cur->bc_tp; args.mp = cur->bc_mp; lbp = cur->bc_bufs[level]; @@ -2629,8 +2415,100 @@ xfs_bmbt_dup_cursor( return new; } +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_bmbt_trace_buf; + +STATIC void +xfs_bmbt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + struct xfs_inode *ip = cur->bc_private.b.ip; + int whichfork = cur->bc_private.b.whichfork; + + ktrace_enter(xfs_bmbt_trace_buf, + (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), + (void *)func, (void *)s, (void *)ip, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); + ktrace_enter(ip->i_btrace, + (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), + (void *)func, (void *)s, (void *)ip, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_bmbt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + struct xfs_bmbt_rec_host r; + + xfs_bmbt_set_all(&r, &cur->bc_rec.b); + + *s0 = (cur->bc_nlevels << 24) | + (cur->bc_private.b.flags << 16) | + cur->bc_private.b.allocated; + *l0 = r.l0; + *l1 = r.l1; +} + +STATIC void +xfs_bmbt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be64_to_cpu(key->bmbt.br_startoff); + *l1 = 0; +} + +STATIC void +xfs_bmbt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + struct xfs_bmbt_irec irec; + + xfs_bmbt_disk_get_all(&rec->bmbt, &irec); + *l0 = irec.br_startoff; + *l1 = irec.br_startblock; + *l2 = irec.br_blockcount; +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_bmbt_ops = { .dup_cursor = xfs_bmbt_dup_cursor, + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_bmbt_trace_enter, + .trace_cursor = xfs_bmbt_trace_cursor, + .trace_key = xfs_bmbt_trace_key, + .trace_record = xfs_bmbt_trace_record, +#endif }; /* diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 4f12fff5497..5628d89cea4 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -233,24 +233,6 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; #ifdef __KERNEL__ -#if defined(XFS_BMBT_TRACE) -/* - * Trace buffer entry types. - */ -#define XFS_BMBT_KTRACE_ARGBI 1 -#define XFS_BMBT_KTRACE_ARGBII 2 -#define XFS_BMBT_KTRACE_ARGFFFI 3 -#define XFS_BMBT_KTRACE_ARGI 4 -#define XFS_BMBT_KTRACE_ARGIFK 5 -#define XFS_BMBT_KTRACE_ARGIFR 6 -#define XFS_BMBT_KTRACE_ARGIK 7 -#define XFS_BMBT_KTRACE_CUR 8 - -#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */ -#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */ -extern ktrace_t *xfs_bmbt_trace_buf; -#endif - /* * Prototypes for xfs_bmap.c to call. */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 57d3bd37526..0647a0eff0d 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -182,6 +182,25 @@ do { \ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + + /* btree tracing */ +#ifdef XFS_BTREE_TRACE + void (*trace_enter)(struct xfs_btree_cur *, const char *, + char *, int, int, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t); + void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *, + __uint64_t *, __uint64_t *); + void (*trace_key)(struct xfs_btree_cur *, + union xfs_btree_key *, __uint64_t *, + __uint64_t *); + void (*trace_record)(struct xfs_btree_cur *, + union xfs_btree_rec *, __uint64_t *, + __uint64_t *, __uint64_t *); +#endif }; /* diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 8c0c4748a8d..fc99524b17a 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -2085,8 +2085,81 @@ xfs_inobt_dup_cursor( cur->bc_private.a.agbp, cur->bc_private.a.agno); } +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_inobt_trace_buf; + +STATIC void +xfs_inobt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type, + (void *)func, (void *)s, NULL, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_inobt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + *s0 = cur->bc_private.a.agno; + *l0 = cur->bc_rec.i.ir_startino; + *l1 = cur->bc_rec.i.ir_free; +} + +STATIC void +xfs_inobt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be32_to_cpu(key->inobt.ir_startino); + *l1 = 0; +} + +STATIC void +xfs_inobt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + *l0 = be32_to_cpu(rec->inobt.ir_startino); + *l1 = be32_to_cpu(rec->inobt.ir_freecount); + *l2 = be64_to_cpu(rec->inobt.ir_free); +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_inobt_trace_enter, + .trace_cursor = xfs_inobt_trace_cursor, + .trace_key = xfs_inobt_trace_key, + .trace_record = xfs_inobt_trace_record, +#endif }; /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2a158a26286..cc0474ddd2d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -41,6 +41,7 @@ #include "xfs_buf_item.h" #include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" @@ -835,7 +836,7 @@ xfs_inode_alloc( #ifdef XFS_BMAP_TRACE ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_RW_TRACE @@ -2673,7 +2674,7 @@ xfs_idestroy( #ifdef XFS_BMAP_TRACE ktrace_free(ip->i_xtrace); #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ktrace_free(ip->i_btrace); #endif #ifdef XFS_RW_TRACE diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3af1f6dd149..2a69a7dee22 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -245,7 +245,7 @@ typedef struct xfs_inode { #ifdef XFS_BMAP_TRACE struct ktrace *i_xtrace; /* inode extent list trace */ #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE struct ktrace *i_btrace; /* inode bmap btree trace */ #endif #ifdef XFS_RW_TRACE -- cgit v1.2.3 From ce5e42db421a41b1ad0cfd68c6058566b963e14b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:23 +1100 Subject: [XFS] add get_maxrecs btree operation Factor xfs_btree_maxrecs into a per-btree operation. The get_maxrecs method is based on a patch from Dave Chinner. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32188a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 9 +++++++++ fs/xfs/xfs_bmap_btree.c | 9 +++++++++ fs/xfs/xfs_btree.c | 29 ++--------------------------- fs/xfs/xfs_btree.h | 3 +++ fs/xfs/xfs_ialloc_btree.c | 9 +++++++++ 5 files changed, 32 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 9c91dfcb6f2..1f268b6f436 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -2219,6 +2219,14 @@ xfs_allocbt_dup_cursor( cur->bc_btnum); } +STATIC int +xfs_allocbt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_alloc_mxr[level != 0]; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_allocbt_trace_buf; @@ -2287,6 +2295,7 @@ xfs_allocbt_trace_record( static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, + .get_maxrecs = xfs_allocbt_get_maxrecs, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_allocbt_trace_enter, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 16f2fde6433..bdcfbea1e06 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2415,6 +2415,14 @@ xfs_bmbt_dup_cursor( return new; } +STATIC int +xfs_bmbt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return XFS_BMAP_BLOCK_IMAXRECS(level, cur); +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_bmbt_trace_buf; @@ -2502,6 +2510,7 @@ xfs_bmbt_trace_record( static const struct xfs_btree_ops xfs_bmbt_ops = { .dup_cursor = xfs_bmbt_dup_cursor, + .get_maxrecs = xfs_bmbt_get_maxrecs, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_bmbt_trace_enter, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 966d58d50fa..893e86f2ad5 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -50,31 +50,6 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }; -/* - * Checking routine: return maxrecs for the block. - */ -STATIC int /* number of records fitting in block */ -xfs_btree_maxrecs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_block_t *block) /* generic btree block pointer */ -{ - switch (cur->bc_btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - return (int)XFS_ALLOC_BLOCK_MAXRECS( - be16_to_cpu(block->bb_level), cur); - case XFS_BTNUM_BMAP: - return (int)XFS_BMAP_BLOCK_IMAXRECS( - be16_to_cpu(block->bb_level), cur); - case XFS_BTNUM_INO: - return (int)XFS_INOBT_BLOCK_MAXRECS( - be16_to_cpu(block->bb_level), cur); - default: - ASSERT(0); - return 0; - } -} - /* * External routines. */ @@ -207,7 +182,7 @@ xfs_btree_check_lblock( be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= - xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && + cur->bc_ops->get_maxrecs(cur, level) && block->bb_leftsib && (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && @@ -245,7 +220,7 @@ xfs_btree_check_sblock( be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= - xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && + cur->bc_ops->get_maxrecs(cur, level) && (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK || be32_to_cpu(block->bb_leftsib) < agflen) && block->bb_leftsib && diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 0647a0eff0d..5398cd0d4d4 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -183,6 +183,9 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + /* records in block/level */ + int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); + /* btree tracing */ #ifdef XFS_BTREE_TRACE void (*trace_enter)(struct xfs_btree_cur *, const char *, diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index fc99524b17a..18867f1aaca 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -2085,6 +2085,14 @@ xfs_inobt_dup_cursor( cur->bc_private.a.agbp, cur->bc_private.a.agno); } +STATIC int +xfs_inobt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_inobt_mxr[level != 0]; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_inobt_trace_buf; @@ -2153,6 +2161,7 @@ xfs_inobt_trace_record( static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, + .get_maxrecs = xfs_inobt_get_maxrecs, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_inobt_trace_enter, -- cgit v1.2.3 From 65f1eaeac0efc968797f3ac955b85ba3f5d4f9c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:34 +1100 Subject: [XFS] add helpers for addressing entities inside a btree block Add new helpers in xfs_btree.c to find the record, key and block pointer entries inside a btree block. To implement this genericly the ->get_maxrecs methods and two new xfs_btree_ops entries for the key and record sizes are used. Also add a big comment describing how the addressing inside a btree block works. Note that these helpers are unused until users are introduced in the next patches and this patch will thus cause some harmless compiler warnings. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32189a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 3 ++ fs/xfs/xfs_bmap_btree.c | 3 ++ fs/xfs/xfs_btree.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 13 +++++ fs/xfs/xfs_ialloc_btree.c | 3 ++ 5 files changed, 152 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 1f268b6f436..9e2421c31a3 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -2294,6 +2294,9 @@ xfs_allocbt_trace_record( #endif /* XFS_BTREE_TRACE */ static const struct xfs_btree_ops xfs_allocbt_ops = { + .rec_len = sizeof(xfs_alloc_rec_t), + .key_len = sizeof(xfs_alloc_key_t), + .dup_cursor = xfs_allocbt_dup_cursor, .get_maxrecs = xfs_allocbt_get_maxrecs, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index bdcfbea1e06..a71010abf6e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2509,6 +2509,9 @@ xfs_bmbt_trace_record( #endif /* XFS_BTREE_TRACE */ static const struct xfs_btree_ops xfs_bmbt_ops = { + .rec_len = sizeof(xfs_bmbt_rec_t), + .key_len = sizeof(xfs_bmbt_key_t), + .dup_cursor = xfs_bmbt_dup_cursor, .get_maxrecs = xfs_bmbt_get_maxrecs, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 893e86f2ad5..4aec7c7d5ba 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -402,6 +402,136 @@ xfs_btree_dup_cursor( return 0; } +/* + * XFS btree block layout and addressing: + * + * There are two types of blocks in the btree: leaf and non-leaf blocks. + * + * The leaf record start with a header then followed by records containing + * the values. A non-leaf block also starts with the same header, and + * then first contains lookup keys followed by an equal number of pointers + * to the btree blocks at the previous level. + * + * +--------+-------+-------+-------+-------+-------+-------+ + * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | + * +--------+-------+-------+-------+-------+-------+-------+ + * + * +--------+-------+-------+-------+-------+-------+-------+ + * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | + * +--------+-------+-------+-------+-------+-------+-------+ + * + * The header is called struct xfs_btree_block for reasons better left unknown + * and comes in different versions for short (32bit) and long (64bit) block + * pointers. The record and key structures are defined by the btree instances + * and opaque to the btree core. The block pointers are simple disk endian + * integers, available in a short (32bit) and long (64bit) variant. + * + * The helpers below calculate the offset of a given record, key or pointer + * into a btree block (xfs_btree_*_offset) or return a pointer to the given + * record, key or pointer (xfs_btree_*_addr). Note that all addressing + * inside the btree block is done using indices starting at one, not zero! + */ + +/* + * Return size of the btree block header for this btree instance. + */ +static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) +{ + return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + sizeof(struct xfs_btree_lblock) : + sizeof(struct xfs_btree_sblock); +} + +/* + * Return size of btree block pointers for this btree instance. + */ +static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) +{ + return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + sizeof(__be64) : sizeof(__be32); +} + +/* + * Calculate offset of the n-th record in a btree block. + */ +STATIC size_t +xfs_btree_rec_offset( + struct xfs_btree_cur *cur, + int n) +{ + return xfs_btree_block_len(cur) + + (n - 1) * cur->bc_ops->rec_len; +} + +/* + * Calculate offset of the n-th key in a btree block. + */ +STATIC size_t +xfs_btree_key_offset( + struct xfs_btree_cur *cur, + int n) +{ + return xfs_btree_block_len(cur) + + (n - 1) * cur->bc_ops->key_len; +} + +/* + * Calculate offset of the n-th block pointer in a btree block. + */ +STATIC size_t +xfs_btree_ptr_offset( + struct xfs_btree_cur *cur, + int n, + int level) +{ + return xfs_btree_block_len(cur) + + cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + + (n - 1) * xfs_btree_ptr_len(cur); +} + +/* + * Return a pointer to the n-th record in the btree block. + */ +STATIC union xfs_btree_rec * +xfs_btree_rec_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + return (union xfs_btree_rec *) + ((char *)block + xfs_btree_rec_offset(cur, n)); +} + +/* + * Return a pointer to the n-th key in the btree block. + */ +STATIC union xfs_btree_key * +xfs_btree_key_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + return (union xfs_btree_key *) + ((char *)block + xfs_btree_key_offset(cur, n)); +} + +/* + * Return a pointer to the n-th block pointer in the btree block. + */ +STATIC union xfs_btree_ptr * +xfs_btree_ptr_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + int level = xfs_btree_get_level(block); + + ASSERT(block->bb_level != 0); + + return (union xfs_btree_ptr *) + ((char *)block + xfs_btree_ptr_offset(cur, n, level)); +} + /* * Get a the root block which is stored in the inode. * diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 5398cd0d4d4..593f82b01b6 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -180,6 +180,10 @@ do { \ #define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ struct xfs_btree_ops { + /* size of the key and record structures */ + size_t key_len; + size_t rec_len; + /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); @@ -497,6 +501,15 @@ xfs_btree_setbuf( int lev, /* level in btree */ struct xfs_buf *bp); /* new buffer to set */ + +/* + * Helpers. + */ +static inline int xfs_btree_get_level(struct xfs_btree_block *block) +{ + return be16_to_cpu(block->bb_level); +} + #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 18867f1aaca..fc6db94492d 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -2160,6 +2160,9 @@ xfs_inobt_trace_record( #endif /* XFS_BTREE_TRACE */ static const struct xfs_btree_ops xfs_inobt_ops = { + .rec_len = sizeof(xfs_inobt_rec_t), + .key_len = sizeof(xfs_inobt_key_t), + .dup_cursor = xfs_inobt_dup_cursor, .get_maxrecs = xfs_inobt_get_maxrecs, -- cgit v1.2.3 From 637aa50f461b8ea6b1e8bf9877b0d13d00085043 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:45 +1100 Subject: [XFS] implement generic xfs_btree_increment From: Dave Chinner Because this is the first major generic btree routine this patch includes some infrastrucure, first a few routines to deal with a btree block that can be either in short or long form, second xfs_btree_read_buf_block, which is the new central routine to read a btree block given a cursor, and third the new xfs_btree_ptr_addr routine to calculate the address for a given btree pointer record. [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32190a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc.c | 12 +-- fs/xfs/xfs_alloc_btree.c | 99 +-------------------- fs/xfs/xfs_alloc_btree.h | 6 -- fs/xfs/xfs_bmap.c | 4 +- fs/xfs/xfs_bmap_btree.c | 88 +----------------- fs/xfs/xfs_bmap_btree.h | 1 - fs/xfs/xfs_btree.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 10 +++ fs/xfs/xfs_ialloc.c | 14 +-- fs/xfs/xfs_ialloc_btree.c | 99 +-------------------- fs/xfs/xfs_ialloc_btree.h | 6 -- fs/xfs/xfs_itable.c | 6 +- 12 files changed, 262 insertions(+), 305 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 69833eb1de4..b8bb694b7da 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -818,7 +818,7 @@ xfs_alloc_ag_vextent_near( XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (ltlen >= args->minlen) break; - if ((error = xfs_alloc_increment(cnt_cur, 0, &i))) + if ((error = xfs_btree_increment(cnt_cur, 0, &i))) goto error0; } while (i); ASSERT(ltlen >= args->minlen); @@ -828,7 +828,7 @@ xfs_alloc_ag_vextent_near( i = cnt_cur->bc_ptrs[0]; for (j = 1, blen = 0, bdiff = 0; !error && j && (blen < args->maxlen || bdiff > 0); - error = xfs_alloc_increment(cnt_cur, 0, &j)) { + error = xfs_btree_increment(cnt_cur, 0, &j)) { /* * For each entry, decide if it's better than * the previous best entry. @@ -938,7 +938,7 @@ xfs_alloc_ag_vextent_near( * Increment the cursor, so we will point at the entry just right * of the leftward entry if any, or to the leftmost entry. */ - if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) + if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) goto error0; if (!i) { /* @@ -977,7 +977,7 @@ xfs_alloc_ag_vextent_near( args->minlen, >bnoa, >lena); if (gtlena >= args->minlen) break; - if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) + if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) goto error0; if (!i) { xfs_btree_del_cursor(bno_cur_gt, @@ -1066,7 +1066,7 @@ xfs_alloc_ag_vextent_near( /* * Fell off the right end. */ - if ((error = xfs_alloc_increment( + if ((error = xfs_btree_increment( bno_cur_gt, 0, &i))) goto error0; if (!i) { @@ -1548,7 +1548,7 @@ xfs_free_ag_extent( * Look for a neighboring block on the right (higher block numbers) * that is contiguous with this space. */ - if ((error = xfs_alloc_increment(bno_cur, 0, &haveright))) + if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) goto error0; if (haveright) { /* diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 9e2421c31a3..febc2d5ea29 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -303,7 +303,7 @@ xfs_alloc_delrec( */ i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_increment(tcur, level, &i))) + if ((error = xfs_btree_increment(tcur, level, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); i = xfs_btree_lastrec(tcur, level); @@ -517,7 +517,7 @@ xfs_alloc_delrec( * us, increment the cursor at that level. */ else if (level + 1 < cur->bc_nlevels && - (error = xfs_alloc_increment(cur, level + 1, &i))) + (error = xfs_btree_increment(cur, level + 1, &i))) return error; /* * Fix up the number of records in the surviving block. @@ -1134,7 +1134,7 @@ xfs_alloc_lookup( int i; cur->bc_ptrs[0] = keyno; - if ((error = xfs_alloc_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); *stat = 1; @@ -1570,7 +1570,7 @@ xfs_alloc_rshift( return error; i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_increment(tcur, level, &i)) || + if ((error = xfs_btree_increment(tcur, level, &i)) || (error = xfs_alloc_updkey(tcur, rkp, level + 1))) goto error0; xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); @@ -1942,97 +1942,6 @@ xfs_alloc_get_rec( return 0; } -/* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_alloc_increment( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree, 0 is leaf */ - int *stat) /* success/failure */ -{ - xfs_alloc_block_t *block; /* btree block */ - xfs_buf_t *bp; /* tree block buffer */ - int error; /* error return value */ - int lev; /* btree level */ - - ASSERT(level < cur->bc_nlevels); - /* - * Read-ahead to the right at this level. - */ - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); - /* - * Get a pointer to the btree block. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - /* - * Increment the ptr at this level. If we're still in the block - * then we're done. - */ - if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) { - *stat = 1; - return 0; - } - /* - * If we just went off the right edge of the tree, return failure. - */ - if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * March up the tree incrementing pointers. - * Stop when we don't go off the right edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - bp = cur->bc_bufs[lev]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; -#endif - if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs)) - break; - /* - * Read-ahead the right block, we're going to read it - * in the next loop. - */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); - } - /* - * If we went off the root then we are seriously confused. - */ - ASSERT(lev < cur->bc_nlevels); - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp); - lev > level; ) { - xfs_agblock_t agbno; /* block number of btree block */ - - agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, agbno, 0, &bp, - XFS_ALLOC_BTREE_REF))) - return error; - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; - cur->bc_ptrs[lev] = 1; - } - *stat = 1; - return 0; -} - /* * Insert the current record at the point referenced by cur. * The cursor may be inconsistent on return if splits have been done. diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 60735384a4c..643cfabbf67 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -113,12 +113,6 @@ extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat); extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, xfs_extlen_t *len, int *stat); -/* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat); - /* * Insert the current record at the point referenced by cur. * The cursor may be inconsistent on return if splits have been done. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index a84d0c30b48..4b1ec44c80a 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1646,7 +1646,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_blockcount - new->br_blockcount, oldext))) goto done; - if ((error = xfs_bmbt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto done; if ((error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, @@ -3253,7 +3253,7 @@ xfs_bmap_del_extent( got.br_startblock, temp, got.br_state))) goto done; - if ((error = xfs_bmbt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto done; cur->bc_rec.b = new; error = xfs_bmbt_insert(cur, &i); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index a71010abf6e..2d29a4980cf 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -254,7 +254,7 @@ xfs_bmbt_delrec( if (rbno != NULLFSBLOCK) { i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_bmbt_increment(tcur, level, &i))) { + if ((error = xfs_btree_increment(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -445,7 +445,7 @@ xfs_bmbt_delrec( cur->bc_bufs[level] = lbp; cur->bc_ptrs[level] += lrecs; cur->bc_ra[level] = 0; - } else if ((error = xfs_bmbt_increment(cur, level + 1, &i))) { + } else if ((error = xfs_btree_increment(cur, level + 1, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -929,7 +929,7 @@ xfs_bmbt_lookup( if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) && be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) { cur->bc_ptrs[0] = keyno; - if ((error = xfs_bmbt_increment(cur, 0, &i))) { + if ((error = xfs_btree_increment(cur, 0, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1202,7 +1202,7 @@ xfs_bmbt_rshift( } i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_bmbt_increment(tcur, level, &i))) { + if ((error = xfs_btree_increment(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(tcur, ERROR); goto error1; } @@ -1760,86 +1760,6 @@ xfs_bmbt_disk_get_startoff( XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; } -/* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_bmbt_increment( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - xfs_bmbt_block_t *block; - xfs_buf_t *bp; - int error; /* error return value */ - xfs_fsblock_t fsbno; - int lev; - xfs_mount_t *mp; - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - ASSERT(level < cur->bc_nlevels); - - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); - block = xfs_bmbt_get_block(cur, level, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - if (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - block = xfs_bmbt_get_block(cur, lev, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs)) - break; - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); - } - if (lev == cur->bc_nlevels) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - tp = cur->bc_tp; - mp = cur->bc_mp; - for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { - fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_BMBT_BLOCK(bp); - if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - cur->bc_ptrs[lev] = 1; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - /* * Insert the current record at the point referenced by cur. * diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 5628d89cea4..a45be38d9a3 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -251,7 +251,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); -extern int xfs_bmbt_increment(struct xfs_btree_cur *, int, int *); extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *); extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 4aec7c7d5ba..e9ab86b7990 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -35,6 +35,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_error.h" @@ -949,3 +950,224 @@ xfs_btree_setbuf( cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } } + +STATIC int +xfs_btree_ptr_is_null( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return be64_to_cpu(ptr->l) == NULLFSBLOCK; + else + return be32_to_cpu(ptr->s) == NULLAGBLOCK; +} + +/* + * Get/set/init sibling pointers + */ +STATIC void +xfs_btree_get_sibling( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_ptr *ptr, + int lr) +{ + ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (lr == XFS_BB_RIGHTSIB) + ptr->l = block->bb_u.l.bb_rightsib; + else + ptr->l = block->bb_u.l.bb_leftsib; + } else { + if (lr == XFS_BB_RIGHTSIB) + ptr->s = block->bb_u.s.bb_rightsib; + else + ptr->s = block->bb_u.s.bb_leftsib; + } +} + +STATIC xfs_daddr_t +xfs_btree_ptr_to_daddr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK); + + return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); + } else { + ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); + ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); + + return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, + be32_to_cpu(ptr->s)); + } +} + +STATIC void +xfs_btree_set_refs( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + switch (cur->bc_btnum) { + case XFS_BTNUM_BNO: + case XFS_BTNUM_CNT: + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); + break; + case XFS_BTNUM_INO: + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); + break; + case XFS_BTNUM_BMAP: + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); + break; + default: + ASSERT(0); + } +} + +/* + * Read in the buffer at the given ptr and return the buffer and + * the block pointer within the buffer. + */ +STATIC int +xfs_btree_read_buf_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int level, + int flags, + struct xfs_btree_block **block, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_daddr_t d; + int error; + + /* need to sort out how callers deal with failures first */ + ASSERT(!(flags & XFS_BUF_TRYLOCK)); + + d = xfs_btree_ptr_to_daddr(cur, ptr); + error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, + mp->m_bsize, flags, bpp); + if (error) + return error; + + ASSERT(*bpp != NULL); + ASSERT(!XFS_BUF_GETERROR(*bpp)); + + xfs_btree_set_refs(cur, *bpp); + *block = XFS_BUF_TO_BLOCK(*bpp); + + error = xfs_btree_check_block(cur, *block, level, *bpp); + if (error) + xfs_trans_brelse(cur->bc_tp, *bpp); + return error; +} + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_btree_increment( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; + union xfs_btree_ptr ptr; + struct xfs_buf *bp; + int error; /* error return value */ + int lev; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + ASSERT(level < cur->bc_nlevels); + + /* Read-ahead to the right at this level. */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + + /* Get a pointer to the btree block. */ + block = xfs_btree_get_block(cur, level, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* We're done if we remain in the block after the increment. */ + if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) + goto out1; + + /* Fail if we just went off the right edge of the tree. */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &ptr)) + goto out0; + + XFS_BTREE_STATS_INC(cur, increment); + + /* + * March up the tree incrementing pointers. + * Stop when we don't go off the right edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + block = xfs_btree_get_block(cur, lev, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, lev, bp); + if (error) + goto error0; +#endif + + if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) + break; + + /* Read-ahead the right block for the next loop. */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + } + + /* + * If we went off the root then we are either seriously + * confused or have the tree root in an inode. + */ + if (lev == cur->bc_nlevels) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + goto out0; + ASSERT(0); + error = EFSCORRUPTED; + goto error0; + } + ASSERT(lev < cur->bc_nlevels); + + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { + union xfs_btree_ptr *ptrp; + + ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + error = xfs_btree_read_buf_block(cur, ptrp, --lev, + 0, &block, &bp); + if (error) + goto error0; + + xfs_btree_setbuf(cur, lev, bp); + cur->bc_ptrs[lev] = 1; + } +out1: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 593f82b01b6..f5a4b8ec4cd 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -502,9 +502,19 @@ xfs_btree_setbuf( struct xfs_buf *bp); /* new buffer to set */ +/* + * Common btree core entry points. + */ +int xfs_btree_increment(struct xfs_btree_cur *, int, int *); + /* * Helpers. */ +static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block) +{ + return be16_to_cpu(block->bb_numrecs); +} + static inline int xfs_btree_get_level(struct xfs_btree_block *block) { return be16_to_cpu(block->bb_level); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 11bb169561c..3a8f0670e07 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -695,7 +695,7 @@ nextag: goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); freecount += rec.ir_freecount; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; } while (i == 1); @@ -753,7 +753,7 @@ nextag: /* * Search right with cur, go forward 1 record. */ - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error1; doneright = !i; if (!doneright) { @@ -835,7 +835,7 @@ nextag: * further right. */ else { - if ((error = xfs_inobt_increment(cur, 0, + if ((error = xfs_btree_increment(cur, 0, &i))) goto error1; doneright = !i; @@ -890,7 +890,7 @@ nextag: XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (rec.ir_freecount > 0) break; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } @@ -924,7 +924,7 @@ nextag: goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); freecount += rec.ir_freecount; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; } while (i == 1); ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || @@ -1033,7 +1033,7 @@ xfs_difree( goto error0; if (i) { freecount += rec.ir_freecount; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; } } while (i == 1); @@ -1138,7 +1138,7 @@ xfs_difree( goto error0; if (i) { freecount += rec.ir_freecount; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; } } while (i == 1); diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index fc6db94492d..41717da6369 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -253,7 +253,7 @@ xfs_inobt_delrec( */ i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_inobt_increment(tcur, level, &i))) + if ((error = xfs_btree_increment(tcur, level, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); i = xfs_btree_lastrec(tcur, level); @@ -463,7 +463,7 @@ xfs_inobt_delrec( * us, increment the cursor at that level. */ else if (level + 1 < cur->bc_nlevels && - (error = xfs_alloc_increment(cur, level + 1, &i))) + (error = xfs_btree_increment(cur, level + 1, &i))) return error; /* * Fix up the number of records in the surviving block. @@ -1014,7 +1014,7 @@ xfs_inobt_lookup( int i; cur->bc_ptrs[0] = keyno; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) return error; ASSERT(i == 1); *stat = 1; @@ -1443,7 +1443,7 @@ xfs_inobt_rshift( if ((error = xfs_btree_dup_cursor(cur, &tcur))) return error; xfs_btree_lastrec(tcur, level); - if ((error = xfs_inobt_increment(tcur, level, &i)) || + if ((error = xfs_btree_increment(tcur, level, &i)) || (error = xfs_inobt_updkey(tcur, rkp, level + 1))) { xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; @@ -1820,97 +1820,6 @@ xfs_inobt_get_rec( return 0; } -/* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_inobt_increment( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree, 0 is leaf */ - int *stat) /* success/failure */ -{ - xfs_inobt_block_t *block; /* btree block */ - xfs_buf_t *bp; /* buffer containing btree block */ - int error; /* error return value */ - int lev; /* btree level */ - - ASSERT(level < cur->bc_nlevels); - /* - * Read-ahead to the right at this level. - */ - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); - /* - * Get a pointer to the btree block. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - /* - * Increment the ptr at this level. If we're still in the block - * then we're done. - */ - if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) { - *stat = 1; - return 0; - } - /* - * If we just went off the right edge of the tree, return failure. - */ - if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * March up the tree incrementing pointers. - * Stop when we don't go off the right edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - bp = cur->bc_bufs[lev]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; -#endif - if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs)) - break; - /* - * Read-ahead the right block, we're going to read it - * in the next loop. - */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); - } - /* - * If we went off the root then we are seriously confused. - */ - ASSERT(lev < cur->bc_nlevels); - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp); - lev > level; ) { - xfs_agblock_t agbno; /* block number of btree block */ - - agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, agbno, 0, &bp, - XFS_INO_BTREE_REF))) - return error; - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_INOBT_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; - cur->bc_ptrs[lev] = 1; - } - *stat = 1; - return 0; -} - /* * Insert the current record at the point referenced by cur. * The cursor may be inconsistent on return if splits have been done. diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index eea409349eb..07fed62bcb7 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -135,12 +135,6 @@ extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat); extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, __int32_t *fcnt, xfs_inofree_t *free, int *stat); -/* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -extern int xfs_inobt_increment(struct xfs_btree_cur *cur, int level, int *stat); - /* * Insert the current record at the point referenced by cur. * The cursor may be inconsistent on return if splits have been done. diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index a5f02f0e4c2..42a214b8df9 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -471,7 +471,7 @@ xfs_bulkstat( * In any case, increment to the next record. */ if (!error) - error = xfs_inobt_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &tmp); } else { /* * Start of ag. Lookup the first inode chunk. @@ -538,7 +538,7 @@ xfs_bulkstat( * Set agino to after this chunk and bump the cursor. */ agino = gino + XFS_INODES_PER_CHUNK; - error = xfs_inobt_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &tmp); cond_resched(); } /* @@ -885,7 +885,7 @@ xfs_inumbers( bufidx = 0; } if (left) { - error = xfs_inobt_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &tmp); if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); cur = NULL; -- cgit v1.2.3 From 8df4da4a0a642d3a016028c0d922bcb4d5a4a6d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:58 +1100 Subject: [XFS] implement generic xfs_btree_decrement From: Dave Chinner [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32191a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc.c | 12 +++--- fs/xfs/xfs_alloc_btree.c | 98 ++++------------------------------------------ fs/xfs/xfs_alloc_btree.h | 6 --- fs/xfs/xfs_bmap.c | 14 +++---- fs/xfs/xfs_bmap_btree.c | 90 ++++-------------------------------------- fs/xfs/xfs_bmap_btree.h | 1 - fs/xfs/xfs_btree.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 1 + fs/xfs/xfs_ialloc.c | 4 +- fs/xfs/xfs_ialloc_btree.c | 98 ++++------------------------------------------ fs/xfs/xfs_ialloc_btree.h | 6 --- 11 files changed, 137 insertions(+), 292 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index b8bb694b7da..7ca6903e235 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -961,7 +961,7 @@ xfs_alloc_ag_vextent_near( args->minlen, <bnoa, <lena); if (ltlena >= args->minlen) break; - if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) goto error0; if (!i) { xfs_btree_del_cursor(bno_cur_lt, @@ -1162,7 +1162,7 @@ xfs_alloc_ag_vextent_near( /* * Fell off the left end. */ - if ((error = xfs_alloc_decrement( + if ((error = xfs_btree_decrement( bno_cur_lt, 0, &i))) goto error0; if (!i) { @@ -1321,7 +1321,7 @@ xfs_alloc_ag_vextent_size( bestflen = flen; bestfbno = fbno; for (;;) { - if ((error = xfs_alloc_decrement(cnt_cur, 0, &i))) + if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) goto error0; if (i == 0) break; @@ -1416,7 +1416,7 @@ xfs_alloc_ag_vextent_small( xfs_extlen_t flen; int i; - if ((error = xfs_alloc_decrement(ccur, 0, &i))) + if ((error = xfs_btree_decrement(ccur, 0, &i))) goto error0; if (i) { if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) @@ -1607,7 +1607,7 @@ xfs_free_ag_extent( /* * Move the by-block cursor back to the left neighbor. */ - if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); #ifdef DEBUG @@ -1653,7 +1653,7 @@ xfs_free_ag_extent( * Back up the by-block cursor to the left neighbor, and * update its length. */ - if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); nbno = ltbno; diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index febc2d5ea29..6b45481ad5b 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -256,7 +256,7 @@ xfs_alloc_delrec( xfs_btree_setbuf(cur, level, NULL); cur->bc_nlevels--; } else if (level > 0 && - (error = xfs_alloc_decrement(cur, level, &i))) + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -272,7 +272,7 @@ xfs_alloc_delrec( * the minimum, we're done. */ if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -336,7 +336,7 @@ xfs_alloc_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); if (level > 0 && - (error = xfs_alloc_decrement(cur, level, + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; @@ -352,7 +352,7 @@ xfs_alloc_delrec( if (lbno != NULLAGBLOCK) { i = xfs_btree_firstrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_decrement(tcur, level, &i))) + if ((error = xfs_btree_decrement(tcur, level, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } @@ -368,7 +368,7 @@ xfs_alloc_delrec( */ i = xfs_btree_firstrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_decrement(tcur, level, &i))) + if ((error = xfs_btree_decrement(tcur, level, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); xfs_btree_firstrec(tcur, level); @@ -468,7 +468,7 @@ xfs_alloc_delrec( * Just return. This is probably a logic error, but it's not fatal. */ else { - if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -1779,90 +1779,6 @@ xfs_alloc_updkey( * Externally visible routines. */ -/* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_alloc_decrement( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree, 0 is leaf */ - int *stat) /* success/failure */ -{ - xfs_alloc_block_t *block; /* btree block */ - int error; /* error return value */ - int lev; /* btree level */ - - ASSERT(level < cur->bc_nlevels); - /* - * Read-ahead to the left at this level. - */ - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); - /* - * Decrement the ptr at this level. If we're still in the block - * then we're done. - */ - if (--cur->bc_ptrs[level] > 0) { - *stat = 1; - return 0; - } - /* - * Get a pointer to the btree block. - */ - block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, - cur->bc_bufs[level]))) - return error; -#endif - /* - * If we just went off the left edge of the tree, return failure. - */ - if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * March up the tree decrementing pointers. - * Stop when we don't go off the left edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) - break; - /* - * Read-ahead the left block, we're going to read it - * in the next loop. - */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); - } - /* - * If we went off the root then we are seriously confused. - */ - ASSERT(lev < cur->bc_nlevels); - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) { - xfs_agblock_t agbno; /* block number of btree block */ - xfs_buf_t *bp; /* buffer pointer for block */ - - agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, agbno, 0, &bp, - XFS_ALLOC_BTREE_REF))) - return error; - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; - cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs); - } - *stat = 1; - return 0; -} - /* * Delete the record pointed to by cur. * The cursor refers to the place where the record was (could be inserted) @@ -1889,7 +1805,7 @@ xfs_alloc_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_alloc_decrement(cur, level, &i))) + if ((error = xfs_btree_decrement(cur, level, &i))) return error; break; } diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 643cfabbf67..b59d7fc78fe 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -94,12 +94,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur)) -/* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat); - /* * Delete the record pointed to by cur. * The cursor refers to the place where the record was (could be inserted) diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 4b1ec44c80a..bdbab54948f 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -820,7 +820,7 @@ xfs_bmap_add_extent_delay_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, @@ -1381,13 +1381,13 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, @@ -1430,7 +1430,7 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, @@ -1473,7 +1473,7 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, new->br_startoff, @@ -1556,7 +1556,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_blockcount - new->br_blockcount, oldext))) goto done; - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; if (xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, @@ -2108,7 +2108,7 @@ xfs_bmap_add_extent_hole_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, left.br_startoff, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 2d29a4980cf..7b5181d34a5 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -203,7 +203,7 @@ xfs_bmbt_delrec( XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } - if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) { + if (level > 0 && (error = xfs_btree_decrement(cur, level, &j))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -216,7 +216,7 @@ xfs_bmbt_delrec( goto error0; } if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) { + if (level > 0 && (error = xfs_btree_decrement(cur, level, &j))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -237,7 +237,7 @@ xfs_bmbt_delrec( XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } - if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) { + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -282,7 +282,7 @@ xfs_bmbt_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); tcur = NULL; if (level > 0) { - if ((error = xfs_bmbt_decrement(cur, + if ((error = xfs_btree_decrement(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); @@ -298,7 +298,7 @@ xfs_bmbt_delrec( if (lbno != NULLFSBLOCK) { i = xfs_btree_firstrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_bmbt_decrement(tcur, level, &i))) { + if ((error = xfs_btree_decrement(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -311,7 +311,7 @@ xfs_bmbt_delrec( /* * decrement to last in block */ - if ((error = xfs_bmbt_decrement(tcur, level, &i))) { + if ((error = xfs_btree_decrement(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -383,7 +383,7 @@ xfs_bmbt_delrec( } lrecs = be16_to_cpu(left->bb_numrecs); } else { - if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) { + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -1486,80 +1486,6 @@ xfs_bmdr_to_bmbt( memcpy(tpp, fpp, sizeof(*fpp) * dmxr); } -/* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_bmbt_decrement( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - xfs_bmbt_block_t *block; - xfs_buf_t *bp; - int error; /* error return value */ - xfs_fsblock_t fsbno; - int lev; - xfs_mount_t *mp; - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - ASSERT(level < cur->bc_nlevels); - - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); - - if (--cur->bc_ptrs[level] > 0) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - block = xfs_bmbt_get_block(cur, level, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) - break; - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); - } - if (lev == cur->bc_nlevels) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - tp = cur->bc_tp; - mp = cur->bc_mp; - for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { - fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_BMBT_BLOCK(bp); - if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - /* * Delete the record pointed to by cur. */ @@ -1582,7 +1508,7 @@ xfs_bmbt_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_bmbt_decrement(cur, level, + if ((error = xfs_btree_decrement(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index a45be38d9a3..1e0f1d10505 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -237,7 +237,6 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; * Prototypes for xfs_bmap.c to call. */ extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); -extern int xfs_bmbt_decrement(struct xfs_btree_cur *, int, int *); extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e9ab86b7990..3d561f8f78d 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1171,3 +1171,102 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_btree_decrement( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; + xfs_buf_t *bp; + int error; /* error return value */ + int lev; + union xfs_btree_ptr ptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + ASSERT(level < cur->bc_nlevels); + + /* Read-ahead to the left at this level. */ + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + + /* We're done if we remain in the block after the decrement. */ + if (--cur->bc_ptrs[level] > 0) + goto out1; + + /* Get a pointer to the btree block. */ + block = xfs_btree_get_block(cur, level, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* Fail if we just went off the left edge of the tree. */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); + if (xfs_btree_ptr_is_null(cur, &ptr)) + goto out0; + + XFS_BTREE_STATS_INC(cur, decrement); + + /* + * March up the tree decrementing pointers. + * Stop when we don't go off the left edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + if (--cur->bc_ptrs[lev] > 0) + break; + /* Read-ahead the left block for the next loop. */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + } + + /* + * If we went off the root then we are seriously confused. + * or the root of the tree is in an inode. + */ + if (lev == cur->bc_nlevels) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + goto out0; + ASSERT(0); + error = EFSCORRUPTED; + goto error0; + } + ASSERT(lev < cur->bc_nlevels); + + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { + union xfs_btree_ptr *ptrp; + + ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + error = xfs_btree_read_buf_block(cur, ptrp, --lev, + 0, &block, &bp); + if (error) + goto error0; + xfs_btree_setbuf(cur, lev, bp); + cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); + } +out1: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index f5a4b8ec4cd..52b2da6ab32 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -506,6 +506,7 @@ xfs_btree_setbuf( * Common btree core entry points. */ int xfs_btree_increment(struct xfs_btree_cur *, int, int *); +int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 3a8f0670e07..d36b42bf3ff 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -739,7 +739,7 @@ nextag: /* * Search left with tcur, back up 1 record. */ - if ((error = xfs_inobt_decrement(tcur, 0, &i))) + if ((error = xfs_btree_decrement(tcur, 0, &i))) goto error1; doneleft = !i; if (!doneleft) { @@ -815,7 +815,7 @@ nextag: * further left. */ if (useleft) { - if ((error = xfs_inobt_decrement(tcur, 0, + if ((error = xfs_btree_decrement(tcur, 0, &i))) goto error1; doneleft = !i; diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 41717da6369..9099a32f997 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -205,7 +205,7 @@ xfs_inobt_delrec( cur->bc_bufs[level] = NULL; cur->bc_nlevels--; } else if (level > 0 && - (error = xfs_inobt_decrement(cur, level, &i))) + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -222,7 +222,7 @@ xfs_inobt_delrec( */ if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { if (level > 0 && - (error = xfs_inobt_decrement(cur, level, &i))) + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -286,7 +286,7 @@ xfs_inobt_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); if (level > 0 && - (error = xfs_inobt_decrement(cur, level, + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; @@ -301,7 +301,7 @@ xfs_inobt_delrec( rrecs = be16_to_cpu(right->bb_numrecs); if (lbno != NULLAGBLOCK) { xfs_btree_firstrec(tcur, level); - if ((error = xfs_inobt_decrement(tcur, level, &i))) + if ((error = xfs_btree_decrement(tcur, level, &i))) goto error0; } } @@ -315,7 +315,7 @@ xfs_inobt_delrec( * previous block. */ xfs_btree_firstrec(tcur, level); - if ((error = xfs_inobt_decrement(tcur, level, &i))) + if ((error = xfs_btree_decrement(tcur, level, &i))) goto error0; xfs_btree_firstrec(tcur, level); /* @@ -414,7 +414,7 @@ xfs_inobt_delrec( * Just return. This is probably a logic error, but it's not fatal. */ else { - if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i))) + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -1655,90 +1655,6 @@ xfs_inobt_updkey( * Externally visible routines. */ -/* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_inobt_decrement( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree, 0 is leaf */ - int *stat) /* success/failure */ -{ - xfs_inobt_block_t *block; /* btree block */ - int error; - int lev; /* btree level */ - - ASSERT(level < cur->bc_nlevels); - /* - * Read-ahead to the left at this level. - */ - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); - /* - * Decrement the ptr at this level. If we're still in the block - * then we're done. - */ - if (--cur->bc_ptrs[level] > 0) { - *stat = 1; - return 0; - } - /* - * Get a pointer to the btree block. - */ - block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, - cur->bc_bufs[level]))) - return error; -#endif - /* - * If we just went off the left edge of the tree, return failure. - */ - if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * March up the tree decrementing pointers. - * Stop when we don't go off the left edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) - break; - /* - * Read-ahead the left block, we're going to read it - * in the next loop. - */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); - } - /* - * If we went off the root then we are seriously confused. - */ - ASSERT(lev < cur->bc_nlevels); - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) { - xfs_agblock_t agbno; /* block number of btree block */ - xfs_buf_t *bp; /* buffer containing btree block */ - - agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, agbno, 0, &bp, - XFS_INO_BTREE_REF))) - return error; - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_INOBT_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; - cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs); - } - *stat = 1; - return 0; -} - /* * Delete the record pointed to by cur. * The cursor refers to the place where the record was (could be inserted) @@ -1765,7 +1681,7 @@ xfs_inobt_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_inobt_decrement(cur, level, &i))) + if ((error = xfs_btree_decrement(cur, level, &i))) return error; break; } diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 07fed62bcb7..84554595d28 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -116,12 +116,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \ i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) -/* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -extern int xfs_inobt_decrement(struct xfs_btree_cur *cur, int level, int *stat); - /* * Delete the record pointed to by cur. * The cursor refers to the place where the record was (could be inserted) -- cgit v1.2.3 From fe033cc848489851f0c7de48f0b1bab5d744ad8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:09 +1100 Subject: [XFS] implement generic xfs_btree_lookup From: Dave Chinner [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32192a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc.c | 48 +++++++ fs/xfs/xfs_alloc_btree.c | 312 +++++++--------------------------------------- fs/xfs/xfs_alloc_btree.h | 20 --- fs/xfs/xfs_bmap.c | 29 +++++ fs/xfs/xfs_bmap_btree.c | 197 +++++------------------------ fs/xfs/xfs_bmap_btree.h | 4 - fs/xfs/xfs_btree.c | 219 ++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 11 ++ fs/xfs/xfs_ialloc.c | 53 ++++++++ fs/xfs/xfs_ialloc.h | 15 +++ fs/xfs/xfs_ialloc_btree.c | 294 ++++++------------------------------------- fs/xfs/xfs_ialloc_btree.h | 20 --- 12 files changed, 487 insertions(+), 735 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 7ca6903e235..6bda0ae26c2 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -89,6 +89,54 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, * Internal functions. */ +/* + * Lookup the record equal to [bno, len] in the btree given by cur. + */ +STATIC int /* error */ +xfs_alloc_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to [bno, len] + * in the btree given by cur. + */ +STATIC int /* error */ +xfs_alloc_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to [bno, len] + * in the btree given by cur. + */ +STATIC int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); +} + + /* * Compute aligned version of the found extent. * Takes alignment and min length into account. diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 6b45481ad5b..b81fbf1216e 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -937,223 +937,6 @@ xfs_alloc_log_recs( xfs_trans_log_buf(cur->bc_tp, bp, first, last); } -/* - * Lookup the record. The cursor is made to point to it, based on dir. - * Return 0 if can't find any such record, 1 for success. - */ -STATIC int /* error */ -xfs_alloc_lookup( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_lookup_t dir, /* <=, ==, or >= */ - int *stat) /* success/failure */ -{ - xfs_agblock_t agbno; /* a.g. relative btree block number */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_alloc_block_t *block=NULL; /* current btree block */ - int diff; /* difference for the current key */ - int error; /* error return value */ - int keyno=0; /* current key number */ - int level; /* level in the btree */ - xfs_mount_t *mp; /* file system mount point */ - - XFS_STATS_INC(xs_abt_lookup); - /* - * Get the allocation group header, and the root block number. - */ - mp = cur->bc_mp; - - { - xfs_agf_t *agf; /* a.g. freespace header */ - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - agno = be32_to_cpu(agf->agf_seqno); - agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]); - } - /* - * Iterate over each level in the btree, starting at the root. - * For each level above the leaves, find the key we need, based - * on the lookup record, then follow the corresponding block - * pointer down to the next level. - */ - for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { - xfs_buf_t *bp; /* buffer pointer for btree block */ - xfs_daddr_t d; /* disk address of btree block */ - - /* - * Get the disk address we're looking for. - */ - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - /* - * If the old buffer at this level is for a different block, - * throw it away, otherwise just use it. - */ - bp = cur->bc_bufs[level]; - if (bp && XFS_BUF_ADDR(bp) != d) - bp = NULL; - if (!bp) { - /* - * Need to get a new buffer. Read it, then - * set it in the cursor, releasing the old one. - */ - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno, - agbno, 0, &bp, XFS_ALLOC_BTREE_REF))) - return error; - xfs_btree_setbuf(cur, level, bp); - /* - * Point to the btree block, now that we have the buffer - */ - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, level, - bp))) - return error; - } else - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - /* - * If we already had a key match at a higher level, we know - * we need to use the first entry in this block. - */ - if (diff == 0) - keyno = 1; - /* - * Otherwise we need to search this block. Do a binary search. - */ - else { - int high; /* high entry number */ - xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */ - xfs_alloc_rec_t *krbase=NULL;/* base of records in block */ - int low; /* low entry number */ - - /* - * Get a pointer to keys or records. - */ - if (level > 0) - kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur); - else - krbase = XFS_ALLOC_REC_ADDR(block, 1, cur); - /* - * Set low and high entry numbers, 1-based. - */ - low = 1; - if (!(high = be16_to_cpu(block->bb_numrecs))) { - /* - * If the block is empty, the tree must - * be an empty leaf. - */ - ASSERT(level == 0 && cur->bc_nlevels == 1); - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - *stat = 0; - return 0; - } - /* - * Binary search the block. - */ - while (low <= high) { - xfs_extlen_t blockcount; /* key value */ - xfs_agblock_t startblock; /* key value */ - - XFS_STATS_INC(xs_abt_compare); - /* - * keyno is average of low and high. - */ - keyno = (low + high) >> 1; - /* - * Get startblock & blockcount. - */ - if (level > 0) { - xfs_alloc_key_t *kkp; - - kkp = kkbase + keyno - 1; - startblock = be32_to_cpu(kkp->ar_startblock); - blockcount = be32_to_cpu(kkp->ar_blockcount); - } else { - xfs_alloc_rec_t *krp; - - krp = krbase + keyno - 1; - startblock = be32_to_cpu(krp->ar_startblock); - blockcount = be32_to_cpu(krp->ar_blockcount); - } - /* - * Compute difference to get next direction. - */ - if (cur->bc_btnum == XFS_BTNUM_BNO) - diff = (int)startblock - - (int)cur->bc_rec.a.ar_startblock; - else if (!(diff = (int)blockcount - - (int)cur->bc_rec.a.ar_blockcount)) - diff = (int)startblock - - (int)cur->bc_rec.a.ar_startblock; - /* - * Less than, move right. - */ - if (diff < 0) - low = keyno + 1; - /* - * Greater than, move left. - */ - else if (diff > 0) - high = keyno - 1; - /* - * Equal, we're done. - */ - else - break; - } - } - /* - * If there are more levels, set up for the next level - * by getting the block number and filling in the cursor. - */ - if (level > 0) { - /* - * If we moved left, need the previous key number, - * unless there isn't one. - */ - if (diff > 0 && --keyno < 1) - keyno = 1; - agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, agbno, level))) - return error; -#endif - cur->bc_ptrs[level] = keyno; - } - } - /* - * Done with the search. - * See if we need to adjust the results. - */ - if (dir != XFS_LOOKUP_LE && diff < 0) { - keyno++; - /* - * If ge search and we went off the end of the block, but it's - * not the last block, we're in the wrong block. - */ - if (dir == XFS_LOOKUP_GE && - keyno > be16_to_cpu(block->bb_numrecs) && - be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) { - int i; - - cur->bc_ptrs[0] = keyno; - if ((error = xfs_btree_increment(cur, 0, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - *stat = 1; - return 0; - } - } - else if (dir == XFS_LOOKUP_LE && diff > 0) - keyno--; - cur->bc_ptrs[0] = keyno; - /* - * Return if we succeeded or not. - */ - if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) - *stat = 0; - else - *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); - return 0; -} - /* * Move 1 record left from cur/level if possible. * Update cur to reflect the new path. @@ -1918,53 +1701,6 @@ xfs_alloc_insert( return 0; } -/* - * Lookup the record equal to [bno, len] in the btree given by cur. - */ -int /* error */ -xfs_alloc_lookup_eq( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -/* - * Lookup the first record greater than or equal to [bno, len] - * in the btree given by cur. - */ -int /* error */ -xfs_alloc_lookup_ge( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat); -} - -/* - * Lookup the first record less than or equal to [bno, len] - * in the btree given by cur. - */ -int /* error */ -xfs_alloc_lookup_le( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat); -} - /* * Update the record referred to by cur, to the value given by [bno, len]. * This either works (return 0) or gets an EFSCORRUPTED error. @@ -2052,6 +1788,51 @@ xfs_allocbt_get_maxrecs( return cur->bc_mp->m_alloc_mxr[level != 0]; } +STATIC void +xfs_allocbt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(rec->alloc.ar_startblock != 0); + + key->alloc.ar_startblock = rec->alloc.ar_startblock; + key->alloc.ar_blockcount = rec->alloc.ar_blockcount; +} + +STATIC void +xfs_allocbt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(agf->agf_roots[cur->bc_btnum] != 0); + + ptr->s = agf->agf_roots[cur->bc_btnum]; +} + +STATIC __int64_t +xfs_allocbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; + xfs_alloc_key_t *kp = &key->alloc; + __int64_t diff; + + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return (__int64_t)be32_to_cpu(kp->ar_startblock) - + rec->ar_startblock; + } + + diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; + if (diff) + return diff; + + return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_allocbt_trace_buf; @@ -2124,6 +1905,9 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, .get_maxrecs = xfs_allocbt_get_maxrecs, + .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, + .key_diff = xfs_allocbt_key_diff, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_allocbt_trace_enter, diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index b59d7fc78fe..aa110ff4feb 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -113,26 +113,6 @@ extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, */ extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat); -/* - * Lookup the record equal to [bno, len] in the btree given by cur. - */ -extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len, int *stat); - -/* - * Lookup the first record greater than or equal to [bno, len] - * in the btree given by cur. - */ -extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len, int *stat); - -/* - * Lookup the first record less than or equal to [bno, len] - * in the btree given by cur. - */ -extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len, int *stat); - /* * Update the record referred to by cur, to the value given by [bno, len]. * This either works (return 0) or gets an EFSCORRUPTED error. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index bdbab54948f..1296b4102e9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -402,6 +402,35 @@ xfs_bmap_disk_count_leaves( * Bmap internal routines. */ +STATIC int /* error */ +xfs_bmbt_lookup_eq( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +STATIC int /* error */ +xfs_bmbt_lookup_ge( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + + /* * Called from xfs_bmap_add_attrfork to handle btree format files. */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 7b5181d34a5..8403d154ae0 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -812,146 +812,6 @@ xfs_bmbt_log_ptrs( XFS_BMBT_TRACE_CURSOR(cur, EXIT); } -/* - * Lookup the record. The cursor is made to point to it, based on dir. - */ -STATIC int /* error */ -xfs_bmbt_lookup( - xfs_btree_cur_t *cur, - xfs_lookup_t dir, - int *stat) /* success/failure */ -{ - xfs_bmbt_block_t *block=NULL; - xfs_buf_t *bp; - xfs_daddr_t d; - xfs_sfiloff_t diff; - int error; /* error return value */ - xfs_fsblock_t fsbno=0; - int high; - int i; - int keyno=0; - xfs_bmbt_key_t *kkbase=NULL; - xfs_bmbt_key_t *kkp; - xfs_bmbt_rec_t *krbase=NULL; - xfs_bmbt_rec_t *krp; - int level; - int low; - xfs_mount_t *mp; - xfs_bmbt_ptr_t *pp; - xfs_bmbt_irec_t *rp; - xfs_fileoff_t startoff; - xfs_trans_t *tp; - - XFS_STATS_INC(xs_bmbt_lookup); - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, (int)dir); - tp = cur->bc_tp; - mp = cur->bc_mp; - rp = &cur->bc_rec.b; - for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { - if (level < cur->bc_nlevels - 1) { - d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = cur->bc_bufs[level]; - if (bp && XFS_BUF_ADDR(bp) != d) - bp = NULL; - if (!bp) { - if ((error = xfs_btree_read_bufl(mp, tp, fsbno, - 0, &bp, XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - xfs_btree_setbuf(cur, level, bp); - block = XFS_BUF_TO_BMBT_BLOCK(bp); - if ((error = xfs_btree_check_lblock(cur, block, - level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } else - block = XFS_BUF_TO_BMBT_BLOCK(bp); - } else - block = xfs_bmbt_get_block(cur, level, &bp); - if (diff == 0) - keyno = 1; - else { - if (level > 0) - kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur); - else - krbase = XFS_BMAP_REC_IADDR(block, 1, cur); - low = 1; - if (!(high = be16_to_cpu(block->bb_numrecs))) { - ASSERT(level == 0); - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - while (low <= high) { - XFS_STATS_INC(xs_bmbt_compare); - keyno = (low + high) >> 1; - if (level > 0) { - kkp = kkbase + keyno - 1; - startoff = be64_to_cpu(kkp->br_startoff); - } else { - krp = krbase + keyno - 1; - startoff = xfs_bmbt_disk_get_startoff(krp); - } - diff = (xfs_sfiloff_t) - (startoff - rp->br_startoff); - if (diff < 0) - low = keyno + 1; - else if (diff > 0) - high = keyno - 1; - else - break; - } - } - if (level > 0) { - if (diff > 0 && --keyno < 1) - keyno = 1; - pp = XFS_BMAP_PTR_IADDR(block, keyno, cur); - fsbno = be64_to_cpu(*pp); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, fsbno, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - cur->bc_ptrs[level] = keyno; - } - } - if (dir != XFS_LOOKUP_LE && diff < 0) { - keyno++; - /* - * If ge search and we went off the end of the block, but it's - * not the last block, we're in the wrong block. - */ - if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) && - be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) { - cur->bc_ptrs[0] = keyno; - if ((error = xfs_btree_increment(cur, 0, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - XFS_WANT_CORRUPTED_RETURN(i == 1); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - } - else if (dir == XFS_LOOKUP_LE && diff > 0) - keyno--; - cur->bc_ptrs[0] = keyno; - if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - } else { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); - } - return 0; -} - /* * Move 1 record left from cur/level if possible. * Update cur to reflect the new path. @@ -1809,34 +1669,6 @@ xfs_bmbt_log_recs( XFS_BMBT_TRACE_CURSOR(cur, EXIT); } -int /* error */ -xfs_bmbt_lookup_eq( - xfs_btree_cur_t *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - int *stat) /* success/failure */ -{ - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; - return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -int /* error */ -xfs_bmbt_lookup_ge( - xfs_btree_cur_t *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - int *stat) /* success/failure */ -{ - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; - return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat); -} - /* * Give the bmap btree a new root block. Copy the old broot contents * down into a real block and make the broot point to it. @@ -2269,6 +2101,32 @@ xfs_bmbt_get_maxrecs( return XFS_BMAP_BLOCK_IMAXRECS(level, cur); } +STATIC void +xfs_bmbt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->bmbt.br_startoff = + cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); +} + +STATIC void +xfs_bmbt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + ptr->l = 0; +} + +STATIC __int64_t +xfs_bmbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) - + cur->bc_rec.b.br_startoff; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_bmbt_trace_buf; @@ -2360,6 +2218,9 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .dup_cursor = xfs_bmbt_dup_cursor, .get_maxrecs = xfs_bmbt_get_maxrecs, + .init_key_from_rec = xfs_bmbt_init_key_from_rec, + .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, + .key_diff = xfs_bmbt_key_diff, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_bmbt_trace_enter, diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 1e0f1d10505..d04198cdc4b 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -254,10 +254,6 @@ extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *); extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); -extern int xfs_bmbt_lookup_eq(struct xfs_btree_cur *, xfs_fileoff_t, - xfs_fsblock_t, xfs_filblks_t, int *); -extern int xfs_bmbt_lookup_ge(struct xfs_btree_cur *, xfs_fileoff_t, - xfs_fsblock_t, xfs_filblks_t, int *); /* * Give the bmap btree a new root block. Copy the old broot contents diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 3d561f8f78d..41912a01bec 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1270,3 +1270,222 @@ error0: return error; } + +STATIC int +xfs_btree_lookup_get_block( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in the btree */ + union xfs_btree_ptr *pp, /* ptr to btree block */ + struct xfs_btree_block **blkp) /* return btree block */ +{ + struct xfs_buf *bp; /* buffer pointer for btree block */ + int error = 0; + + /* special case the root block if in an inode */ + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) { + *blkp = xfs_btree_get_iroot(cur); + return 0; + } + + /* + * If the old buffer at this level for the disk address we are + * looking for re-use it. + * + * Otherwise throw it away and get a new one. + */ + bp = cur->bc_bufs[level]; + if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { + *blkp = XFS_BUF_TO_BLOCK(bp); + return 0; + } + + error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); + if (error) + return error; + + xfs_btree_setbuf(cur, level, bp); + return 0; +} + +/* + * Get current search key. For level 0 we don't actually have a key + * structure so we make one up from the record. For all other levels + * we just return the right key. + */ +STATIC union xfs_btree_key * +xfs_lookup_get_search_key( + struct xfs_btree_cur *cur, + int level, + int keyno, + struct xfs_btree_block *block, + union xfs_btree_key *kp) +{ + if (level == 0) { + cur->bc_ops->init_key_from_rec(kp, + xfs_btree_rec_addr(cur, keyno, block)); + return kp; + } + + return xfs_btree_key_addr(cur, keyno, block); +} + +/* + * Lookup the record. The cursor is made to point to it, based on dir. + * Return 0 if can't find any such record, 1 for success. + */ +int /* error */ +xfs_btree_lookup( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_lookup_t dir, /* <=, ==, or >= */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* current btree block */ + __int64_t diff; /* difference for the current key */ + int error; /* error return value */ + int keyno; /* current key number */ + int level; /* level in the btree */ + union xfs_btree_ptr *pp; /* ptr to btree block */ + union xfs_btree_ptr ptr; /* ptr to btree block */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, dir); + + XFS_BTREE_STATS_INC(cur, lookup); + + block = NULL; + keyno = 0; + + /* initialise start pointer from cursor */ + cur->bc_ops->init_ptr_from_cur(cur, &ptr); + pp = &ptr; + + /* + * Iterate over each level in the btree, starting at the root. + * For each level above the leaves, find the key we need, based + * on the lookup record, then follow the corresponding block + * pointer down to the next level. + */ + for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { + /* Get the block we need to do the lookup on. */ + error = xfs_btree_lookup_get_block(cur, level, pp, &block); + if (error) + goto error0; + + if (diff == 0) { + /* + * If we already had a key match at a higher level, we + * know we need to use the first entry in this block. + */ + keyno = 1; + } else { + /* Otherwise search this block. Do a binary search. */ + + int high; /* high entry number */ + int low; /* low entry number */ + + /* Set low and high entry numbers, 1-based. */ + low = 1; + high = xfs_btree_get_numrecs(block); + if (!high) { + /* Block is empty, must be an empty leaf. */ + ASSERT(level == 0 && cur->bc_nlevels == 1); + + cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Binary search the block. */ + while (low <= high) { + union xfs_btree_key key; + union xfs_btree_key *kp; + + XFS_BTREE_STATS_INC(cur, compare); + + /* keyno is average of low and high. */ + keyno = (low + high) >> 1; + + /* Get current search key */ + kp = xfs_lookup_get_search_key(cur, level, + keyno, block, &key); + + /* + * Compute difference to get next direction: + * - less than, move right + * - greater than, move left + * - equal, we're done + */ + diff = cur->bc_ops->key_diff(cur, kp); + if (diff < 0) + low = keyno + 1; + else if (diff > 0) + high = keyno - 1; + else + break; + } + } + + /* + * If there are more levels, set up for the next level + * by getting the block number and filling in the cursor. + */ + if (level > 0) { + /* + * If we moved left, need the previous key number, + * unless there isn't one. + */ + if (diff > 0 && --keyno < 1) + keyno = 1; + pp = xfs_btree_ptr_addr(cur, keyno, block); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, pp, 0, level); + if (error) + goto error0; +#endif + cur->bc_ptrs[level] = keyno; + } + } + + /* Done with the search. See if we need to adjust the results. */ + if (dir != XFS_LOOKUP_LE && diff < 0) { + keyno++; + /* + * If ge search and we went off the end of the block, but it's + * not the last block, we're in the wrong block. + */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (dir == XFS_LOOKUP_GE && + keyno > xfs_btree_get_numrecs(block) && + !xfs_btree_ptr_is_null(cur, &ptr)) { + int i; + + cur->bc_ptrs[0] = keyno; + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + } + } else if (dir == XFS_LOOKUP_LE && diff > 0) + keyno--; + cur->bc_ptrs[0] = keyno; + + /* Return if we succeeded or not. */ + if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) + *stat = 0; + else if (dir != XFS_LOOKUP_EQ || diff == 0) + *stat = 1; + else + *stat = 0; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 52b2da6ab32..c151175a5fd 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -190,6 +190,16 @@ struct xfs_btree_ops { /* records in block/level */ int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); + /* init values of btree structures */ + void (*init_key_from_rec)(union xfs_btree_key *key, + union xfs_btree_rec *rec); + void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr); + + /* difference between key value and cursor value */ + __int64_t (*key_diff)(struct xfs_btree_cur *cur, + union xfs_btree_key *key); + /* btree tracing */ #ifdef XFS_BTREE_TRACE void (*trace_enter)(struct xfs_btree_cur *, const char *, @@ -507,6 +517,7 @@ xfs_btree_setbuf( */ int xfs_btree_increment(struct xfs_btree_cur *, int, int *); int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); +int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index d36b42bf3ff..bbf537f64c4 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -118,6 +118,59 @@ xfs_ialloc_cluster_alignment( return 1; } +/* + * Lookup the record equal to ino in the btree given by cur. + */ +STATIC int /* error */ +xfs_inobt_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to ino + * in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to ino + * in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); +} + /* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 4e30ec1d13b..4026578bc26 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -154,6 +154,21 @@ xfs_ialloc_pagi_init( struct xfs_trans *tp, /* transaction pointer */ xfs_agnumber_t agno); /* allocation group number */ +/* + * Lookup the first record greater than or equal to ino + * in the btree given by cur. + */ +int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, + __int32_t fcnt, xfs_inofree_t free, int *stat); + +/* + * Lookup the first record less than or equal to ino + * in the btree given by cur. + */ +int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, + __int32_t fcnt, xfs_inofree_t free, int *stat); + + #endif /* __KERNEL__ */ #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 9099a32f997..161c3b2e245 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -828,212 +828,6 @@ xfs_inobt_log_recs( xfs_trans_log_buf(cur->bc_tp, bp, first, last); } -/* - * Lookup the record. The cursor is made to point to it, based on dir. - * Return 0 if can't find any such record, 1 for success. - */ -STATIC int /* error */ -xfs_inobt_lookup( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_lookup_t dir, /* <=, ==, or >= */ - int *stat) /* success/failure */ -{ - xfs_agblock_t agbno; /* a.g. relative btree block number */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_inobt_block_t *block=NULL; /* current btree block */ - __int64_t diff; /* difference for the current key */ - int error; /* error return value */ - int keyno=0; /* current key number */ - int level; /* level in the btree */ - xfs_mount_t *mp; /* file system mount point */ - - /* - * Get the allocation group header, and the root block number. - */ - mp = cur->bc_mp; - { - xfs_agi_t *agi; /* a.g. inode header */ - - agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); - agno = be32_to_cpu(agi->agi_seqno); - agbno = be32_to_cpu(agi->agi_root); - } - /* - * Iterate over each level in the btree, starting at the root. - * For each level above the leaves, find the key we need, based - * on the lookup record, then follow the corresponding block - * pointer down to the next level. - */ - for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { - xfs_buf_t *bp; /* buffer pointer for btree block */ - xfs_daddr_t d; /* disk address of btree block */ - - /* - * Get the disk address we're looking for. - */ - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - /* - * If the old buffer at this level is for a different block, - * throw it away, otherwise just use it. - */ - bp = cur->bc_bufs[level]; - if (bp && XFS_BUF_ADDR(bp) != d) - bp = NULL; - if (!bp) { - /* - * Need to get a new buffer. Read it, then - * set it in the cursor, releasing the old one. - */ - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - agno, agbno, 0, &bp, XFS_INO_BTREE_REF))) - return error; - xfs_btree_setbuf(cur, level, bp); - /* - * Point to the btree block, now that we have the buffer - */ - block = XFS_BUF_TO_INOBT_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, level, - bp))) - return error; - } else - block = XFS_BUF_TO_INOBT_BLOCK(bp); - /* - * If we already had a key match at a higher level, we know - * we need to use the first entry in this block. - */ - if (diff == 0) - keyno = 1; - /* - * Otherwise we need to search this block. Do a binary search. - */ - else { - int high; /* high entry number */ - xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */ - xfs_inobt_rec_t *krbase=NULL;/* base of records in block */ - int low; /* low entry number */ - - /* - * Get a pointer to keys or records. - */ - if (level > 0) - kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur); - else - krbase = XFS_INOBT_REC_ADDR(block, 1, cur); - /* - * Set low and high entry numbers, 1-based. - */ - low = 1; - if (!(high = be16_to_cpu(block->bb_numrecs))) { - /* - * If the block is empty, the tree must - * be an empty leaf. - */ - ASSERT(level == 0 && cur->bc_nlevels == 1); - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - *stat = 0; - return 0; - } - /* - * Binary search the block. - */ - while (low <= high) { - xfs_agino_t startino; /* key value */ - - /* - * keyno is average of low and high. - */ - keyno = (low + high) >> 1; - /* - * Get startino. - */ - if (level > 0) { - xfs_inobt_key_t *kkp; - - kkp = kkbase + keyno - 1; - startino = be32_to_cpu(kkp->ir_startino); - } else { - xfs_inobt_rec_t *krp; - - krp = krbase + keyno - 1; - startino = be32_to_cpu(krp->ir_startino); - } - /* - * Compute difference to get next direction. - */ - diff = (__int64_t) - startino - cur->bc_rec.i.ir_startino; - /* - * Less than, move right. - */ - if (diff < 0) - low = keyno + 1; - /* - * Greater than, move left. - */ - else if (diff > 0) - high = keyno - 1; - /* - * Equal, we're done. - */ - else - break; - } - } - /* - * If there are more levels, set up for the next level - * by getting the block number and filling in the cursor. - */ - if (level > 0) { - /* - * If we moved left, need the previous key number, - * unless there isn't one. - */ - if (diff > 0 && --keyno < 1) - keyno = 1; - agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, agbno, level))) - return error; -#endif - cur->bc_ptrs[level] = keyno; - } - } - /* - * Done with the search. - * See if we need to adjust the results. - */ - if (dir != XFS_LOOKUP_LE && diff < 0) { - keyno++; - /* - * If ge search and we went off the end of the block, but it's - * not the last block, we're in the wrong block. - */ - if (dir == XFS_LOOKUP_GE && - keyno > be16_to_cpu(block->bb_numrecs) && - be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) { - int i; - - cur->bc_ptrs[0] = keyno; - if ((error = xfs_btree_increment(cur, 0, &i))) - return error; - ASSERT(i == 1); - *stat = 1; - return 0; - } - } - else if (dir == XFS_LOOKUP_LE && diff > 0) - keyno--; - cur->bc_ptrs[0] = keyno; - /* - * Return if we succeeded or not. - */ - if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) - *stat = 0; - else - *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); - return 0; -} - /* * Move 1 record left from cur/level if possible. * Update cur to reflect the new path. @@ -1797,59 +1591,6 @@ xfs_inobt_insert( return 0; } -/* - * Lookup the record equal to ino in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup_eq( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -/* - * Lookup the first record greater than or equal to ino - * in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup_ge( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat); -} - -/* - * Lookup the first record less than or equal to ino - * in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup_le( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat); -} - /* * Update the record referred to by cur, to the value given * by [ino, fcnt, free]. @@ -1918,6 +1659,38 @@ xfs_inobt_get_maxrecs( return cur->bc_mp->m_inobt_mxr[level != 0]; } +STATIC void +xfs_inobt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->inobt.ir_startino = rec->inobt.ir_startino; +} + +/* + * intial value of ptr for lookup + */ +STATIC void +xfs_inobt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + + ptr->s = agi->agi_root; +} + +STATIC __int64_t +xfs_inobt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + return (__int64_t)be32_to_cpu(key->inobt.ir_startino) - + cur->bc_rec.i.ir_startino; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_inobt_trace_buf; @@ -1990,6 +1763,9 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .get_maxrecs = xfs_inobt_get_maxrecs, + .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, + .key_diff = xfs_inobt_key_diff, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_inobt_trace_enter, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 84554595d28..674b459521f 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -135,26 +135,6 @@ extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, */ extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat); -/* - * Lookup the record equal to ino in the btree given by cur. - */ -extern int xfs_inobt_lookup_eq(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); - -/* - * Lookup the first record greater than or equal to ino - * in the btree given by cur. - */ -extern int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); - -/* - * Lookup the first record less than or equal to ino - * in the btree given by cur. - */ -extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); - /* * Update the record referred to by cur, to the value given * by [ino, fcnt, free]. -- cgit v1.2.3 From 38bb74237d2d94c1aced2ec626d7d0f317e360da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:22 +1100 Subject: [XFS] implement generic xfs_btree_updkey From: Dave Chinner Note that there are many > 80 char lines introduced due to the xfs_btree_key casts. But the places where this happens is throw-away code once the whole btree code gets merged into a common implementation. The same is true for the temporary xfs_alloc_log_keys define to the new name. All old users will be gone after a few patches. [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32193a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 50 +++------------------------ fs/xfs/xfs_bmap_btree.c | 49 +++----------------------- fs/xfs/xfs_btree.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 1 + fs/xfs/xfs_ialloc_btree.c | 50 +++------------------------ 5 files changed, 103 insertions(+), 134 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index b81fbf1216e..28c6a698f56 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -52,7 +52,6 @@ STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_alloc_key_t *, xfs_btree_cur_t **, int *); -STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int); /* * Internal functions. @@ -265,7 +264,7 @@ xfs_alloc_delrec( * If we deleted the leftmost entry in the block, update the * key values above us in the tree. */ - if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1))) + if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)lkp, level + 1))) return error; /* * If the number of records remaining in the block is at least @@ -798,7 +797,7 @@ xfs_alloc_insrec( /* * If we inserted at the start of a block, update the parents' keys. */ - if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1))) + if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) return error; /* * Look to see if the longest extent in the allocation group @@ -1068,7 +1067,7 @@ xfs_alloc_lshift( /* * Update the parent key values of right. */ - if ((error = xfs_alloc_updkey(cur, rkp, level + 1))) + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) return error; /* * Slide the cursor value left one. @@ -1354,7 +1353,7 @@ xfs_alloc_rshift( i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if ((error = xfs_btree_increment(tcur, level, &i)) || - (error = xfs_alloc_updkey(tcur, rkp, level + 1))) + (error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) goto error0; xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); *stat = 1; @@ -1519,45 +1518,6 @@ xfs_alloc_split( return 0; } -/* - * Update keys at all levels from here to the root along the cursor's path. - */ -STATIC int /* error */ -xfs_alloc_updkey( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_alloc_key_t *keyp, /* new key value to update to */ - int level) /* starting level for update */ -{ - int ptr; /* index of key in block */ - - /* - * Go up the tree from this level toward the root. - * At each level, update the key value to the value input. - * Stop when we reach a level where the cursor isn't pointing - * at the first entry in the block. - */ - for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { - xfs_alloc_block_t *block; /* btree block */ - xfs_buf_t *bp; /* buffer for block */ -#ifdef DEBUG - int error; /* error return value */ -#endif - xfs_alloc_key_t *kp; /* ptr to btree block keys */ - - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - ptr = cur->bc_ptrs[level]; - kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); - *kp = *keyp; - xfs_alloc_log_keys(cur, bp, ptr, ptr); - } - return 0; -} - /* * Externally visible routines. */ @@ -1765,7 +1725,7 @@ xfs_alloc_update( key.ar_startblock = cpu_to_be32(bno); key.ar_blockcount = cpu_to_be32(len); - if ((error = xfs_alloc_updkey(cur, &key, 1))) + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) return error; } return 0; diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 8403d154ae0..0a56257b7fd 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -56,7 +56,6 @@ STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, __uint64_t *, xfs_btree_cur_t **, int *); -STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int); #undef EXIT @@ -211,7 +210,7 @@ xfs_bmbt_delrec( *stat = 1; return 0; } - if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) { + if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -635,7 +634,7 @@ xfs_bmbt_insrec( kp + ptr); } #endif - if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) { + if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -935,7 +934,7 @@ xfs_bmbt_lshift( key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); rkp = &key; } - if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) { + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1067,7 +1066,7 @@ xfs_bmbt_rshift( goto error1; } XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_bmbt_updkey(tcur, rkp, level + 1))) { + if ((error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) { XFS_BMBT_TRACE_CURSOR(tcur, ERROR); goto error1; } @@ -1276,44 +1275,6 @@ xfs_bmbt_split( return 0; } - -/* - * Update keys for the record. - */ -STATIC int -xfs_bmbt_updkey( - xfs_btree_cur_t *cur, - xfs_bmbt_key_t *keyp, /* on-disk format */ - int level) -{ - xfs_bmbt_block_t *block; - xfs_buf_t *bp; -#ifdef DEBUG - int error; -#endif - xfs_bmbt_key_t *kp; - int ptr; - - ASSERT(level >= 1); - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGIK(cur, level, keyp); - for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { - block = xfs_bmbt_get_block(cur, level, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - ptr = cur->bc_ptrs[level]; - kp = XFS_BMAP_KEY_IADDR(block, ptr, cur); - *kp = *keyp; - xfs_bmbt_log_keys(cur, bp, ptr, ptr); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; -} - /* * Convert on-disk form of btree root to in-memory form. */ @@ -2039,7 +2000,7 @@ xfs_bmbt_update( return 0; } key.br_startoff = cpu_to_be64(off); - if ((error = xfs_bmbt_updkey(cur, &key, 1))) { + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 41912a01bec..1459a2b9a72 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -34,6 +34,7 @@ #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_inode_item.h" #include "xfs_btree.h" #include "xfs_btree_trace.h" #include "xfs_ialloc.h" @@ -1064,6 +1065,45 @@ xfs_btree_read_buf_block( return error; } +/* + * Copy keys from one btree block to another. + */ +STATIC void +xfs_btree_copy_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *dst_key, + union xfs_btree_key *src_key, + int numkeys) +{ + ASSERT(numkeys >= 0); + memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); +} + +/* + * Log key values from the btree block. + */ +STATIC void +xfs_btree_log_keys( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int first, + int last) +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + if (bp) { + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_key_offset(cur, first), + xfs_btree_key_offset(cur, last + 1) - 1); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + /* * Increment cursor by one record at the level. * For nonzero levels the leaf-ward information is untouched. @@ -1489,3 +1529,50 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Update keys at all levels from here to the root along the cursor's path. + */ +int +xfs_btree_updkey( + struct xfs_btree_cur *cur, + union xfs_btree_key *keyp, + int level) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + union xfs_btree_key *kp; + int ptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIK(cur, level, keyp); + + ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1); + + /* + * Go up the tree from this level toward the root. + * At each level, update the key value to the value input. + * Stop when we reach a level where the cursor isn't pointing + * at the first entry in the block. + */ + for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { +#ifdef DEBUG + int error; +#endif + block = xfs_btree_get_block(cur, level, &bp); +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } +#endif + ptr = cur->bc_ptrs[level]; + kp = xfs_btree_key_addr(cur, ptr, block); + xfs_btree_copy_keys(cur, kp, keyp, 1); + xfs_btree_log_keys(cur, bp, ptr, ptr); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index c151175a5fd..ac3f527b0ac 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -518,6 +518,7 @@ xfs_btree_setbuf( int xfs_btree_increment(struct xfs_btree_cur *, int, int *); int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); +int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 161c3b2e245..cd8bb519cb5 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -48,7 +48,6 @@ STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_inobt_key_t *, xfs_btree_cur_t **, int *); -STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int); /* * Single level of the xfs_inobt_delete record deletion routine. @@ -214,7 +213,7 @@ xfs_inobt_delrec( * If we deleted the leftmost entry in the block, update the * key values above us in the tree. */ - if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1))) + if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1))) return error; /* * If the number of records remaining in the block is at least @@ -723,7 +722,7 @@ xfs_inobt_insrec( /* * If we inserted at the start of a block, update the parents' keys. */ - if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1))) + if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) return error; /* * Return the new block number, if any. @@ -960,7 +959,7 @@ xfs_inobt_lshift( /* * Update the parent key values of right. */ - if ((error = xfs_inobt_updkey(cur, rkp, level + 1))) + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) return error; /* * Slide the cursor value left one. @@ -1238,7 +1237,7 @@ xfs_inobt_rshift( return error; xfs_btree_lastrec(tcur, level); if ((error = xfs_btree_increment(tcur, level, &i)) || - (error = xfs_inobt_updkey(tcur, rkp, level + 1))) { + (error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) { xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; } @@ -1406,45 +1405,6 @@ xfs_inobt_split( return 0; } -/* - * Update keys at all levels from here to the root along the cursor's path. - */ -STATIC int /* error */ -xfs_inobt_updkey( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_inobt_key_t *keyp, /* new key value to update to */ - int level) /* starting level for update */ -{ - int ptr; /* index of key in block */ - - /* - * Go up the tree from this level toward the root. - * At each level, update the key value to the value input. - * Stop when we reach a level where the cursor isn't pointing - * at the first entry in the block. - */ - for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { - xfs_buf_t *bp; /* buffer for block */ - xfs_inobt_block_t *block; /* btree block */ -#ifdef DEBUG - int error; /* error return value */ -#endif - xfs_inobt_key_t *kp; /* ptr to btree block keys */ - - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - ptr = cur->bc_ptrs[level]; - kp = XFS_INOBT_KEY_ADDR(block, ptr, cur); - *kp = *keyp; - xfs_inobt_log_keys(cur, bp, ptr, ptr); - } - return 0; -} - /* * Externally visible routines. */ @@ -1637,7 +1597,7 @@ xfs_inobt_update( xfs_inobt_key_t key; /* key containing [ino] */ key.ir_startino = cpu_to_be32(ino); - if ((error = xfs_inobt_updkey(cur, &key, 1))) + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) return error; } return 0; -- cgit v1.2.3 From 278d0ca14e889c3932a05d1a68675252a12b3466 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:32 +1100 Subject: [XFS] implement generic xfs_btree_update From: Dave Chinner The most complicated part here is the lastrec tracking for the alloc btree. Most logic is in the update_lastrec method which has to do some hopefully good enough dirty magic to maintain it. [hch: split out from bigger patch and a rework of the lastrec logic] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32194a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc.c | 17 +++++++ fs/xfs/xfs_alloc_btree.c | 106 +++++++++++++++------------------------- fs/xfs/xfs_alloc_btree.h | 7 --- fs/xfs/xfs_bmap.c | 18 +++++++ fs/xfs/xfs_bmap_btree.c | 45 ----------------- fs/xfs/xfs_bmap_btree.h | 2 - fs/xfs/xfs_btree.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 14 ++++++ fs/xfs/xfs_ialloc.c | 20 ++++++++ fs/xfs/xfs_ialloc_btree.c | 52 -------------------- fs/xfs/xfs_ialloc_btree.h | 8 --- 11 files changed, 228 insertions(+), 182 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 6bda0ae26c2..875e1bae194 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -136,6 +136,23 @@ xfs_alloc_lookup_le( return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); } +/* + * Update the record referred to by cur to the value given + * by [bno, len]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int /* error */ +xfs_alloc_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len) /* length of extent */ +{ + union xfs_btree_rec rec; + + rec.alloc.ar_startblock = cpu_to_be32(bno); + rec.alloc.ar_blockcount = cpu_to_be32(len); + return xfs_btree_update(cur, &rec); +} /* * Compute aligned version of the found extent. diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 28c6a698f56..c5c32999b81 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -1661,83 +1661,50 @@ xfs_alloc_insert( return 0; } +STATIC struct xfs_btree_cur * +xfs_allocbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_btnum); +} + /* - * Update the record referred to by cur, to the value given by [bno, len]. - * This either works (return 0) or gets an EFSCORRUPTED error. + * Update the longest extent in the AGF */ -int /* error */ -xfs_alloc_update( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len) /* length of extent */ +STATIC void +xfs_allocbt_update_lastrec( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_rec *rec, + int ptr, + int reason) { - xfs_alloc_block_t *block; /* btree block to update */ - int error; /* error return value */ - int ptr; /* current record number (updating) */ + struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + __be32 len; - ASSERT(len > 0); - /* - * Pick up the a.g. freelist struct and the current block. - */ - block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) - return error; -#endif - /* - * Get the address of the rec to be updated. - */ - ptr = cur->bc_ptrs[0]; - { - xfs_alloc_rec_t *rp; /* pointer to updated record */ + ASSERT(cur->bc_btnum == XFS_BTNUM_CNT); - rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); + switch (reason) { + case LASTREC_UPDATE: /* - * Fill in the new contents and log them. + * If this is the last leaf block and it's the last record, + * then update the size of the longest extent in the AG. */ - rp->ar_startblock = cpu_to_be32(bno); - rp->ar_blockcount = cpu_to_be32(len); - xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr); - } - /* - * If it's the by-size btree and it's the last leaf block and - * it's the last record... then update the size of the longest - * extent in the a.g., which we cache in the a.g. freelist header. - */ - if (cur->bc_btnum == XFS_BTNUM_CNT && - be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && - ptr == be16_to_cpu(block->bb_numrecs)) { - xfs_agf_t *agf; /* a.g. freespace header */ - xfs_agnumber_t seqno; - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - seqno = be32_to_cpu(agf->agf_seqno); - cur->bc_mp->m_perag[seqno].pagf_longest = len; - agf->agf_longest = cpu_to_be32(len); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_LONGEST); - } - /* - * Updating first record in leaf. Pass new key value up to our parent. - */ - if (ptr == 1) { - xfs_alloc_key_t key; /* key containing [bno, len] */ - - key.ar_startblock = cpu_to_be32(bno); - key.ar_blockcount = cpu_to_be32(len); - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) - return error; + if (ptr != xfs_btree_get_numrecs(block)) + return; + len = rec->alloc.ar_blockcount; + break; + default: + ASSERT(0); + return; } - return 0; -} -STATIC struct xfs_btree_cur * -xfs_allocbt_dup_cursor( - struct xfs_btree_cur *cur) -{ - return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, - cur->bc_btnum); + agf->agf_longest = len; + cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); } STATIC int @@ -1864,6 +1831,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .key_len = sizeof(xfs_alloc_key_t), .dup_cursor = xfs_allocbt_dup_cursor, + .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, @@ -1902,6 +1870,8 @@ xfs_allocbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_allocbt_ops; + if (btnum == XFS_BTNUM_CNT) + cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index aa110ff4feb..81e2f360781 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -113,13 +113,6 @@ extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, */ extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat); -/* - * Update the record referred to by cur, to the value given by [bno, len]. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len); - extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 1296b4102e9..7d6c4ace805 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -430,6 +430,24 @@ xfs_bmbt_lookup_ge( return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); } +/* +* Update the record referred to by cur to the value given + * by [off, bno, len, state]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int +xfs_bmbt_update( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + xfs_exntst_t state) +{ + union xfs_btree_rec rec; + + xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state); + return xfs_btree_update(cur, &rec); +} /* * Called from xfs_bmap_add_attrfork to handle btree format files. diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 0a56257b7fd..99200a9898f 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -1963,51 +1963,6 @@ xfs_bmbt_to_bmdr( memcpy(tpp, fpp, sizeof(*fpp) * dmxr); } -/* - * Update the record to the passed values. - */ -int -xfs_bmbt_update( - xfs_btree_cur_t *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - xfs_exntst_t state) -{ - xfs_bmbt_block_t *block; - xfs_buf_t *bp; - int error; - xfs_bmbt_key_t key; - int ptr; - xfs_bmbt_rec_t *rp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno, - (xfs_dfilblks_t)len, (int)state); - block = xfs_bmbt_get_block(cur, 0, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, 0, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - ptr = cur->bc_ptrs[0]; - rp = XFS_BMAP_REC_IADDR(block, ptr, cur); - xfs_bmbt_disk_set_allf(rp, off, bno, len, state); - xfs_bmbt_log_recs(cur, bp, ptr, ptr); - if (ptr > 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; - } - key.br_startoff = cpu_to_be64(off); - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; -} - /* * Check extent records, which have just been read, for * any bit in the extent flag field. ASSERT on debug diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index d04198cdc4b..6bfd62ec54f 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -274,8 +274,6 @@ extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); -extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t, - xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t); extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 1459a2b9a72..205272f282d 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -988,6 +988,30 @@ xfs_btree_get_sibling( } } +/* + * Return true if ptr is the last record in the btree and + * we need to track updateѕ to this record. The decision + * will be further refined in the update_lastrec method. + */ +STATIC int +xfs_btree_is_lastrec( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + int level) +{ + union xfs_btree_ptr ptr; + + if (level > 0) + return 0; + if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) + return 0; + + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &ptr)) + return 0; + return 1; +} + STATIC xfs_daddr_t xfs_btree_ptr_to_daddr( struct xfs_btree_cur *cur, @@ -1079,6 +1103,20 @@ xfs_btree_copy_keys( memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); } +/* + * Copy records from one btree block to another. + */ +STATIC void +xfs_btree_copy_recs( + struct xfs_btree_cur *cur, + union xfs_btree_rec *dst_rec, + union xfs_btree_rec *src_rec, + int numrecs) +{ + ASSERT(numrecs >= 0); + memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); +} + /* * Log key values from the btree block. */ @@ -1104,6 +1142,26 @@ xfs_btree_log_keys( XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } +/* + * Log record values from the btree block. + */ +STATIC void +xfs_btree_log_recs( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int first, + int last) +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_rec_offset(cur, first), + xfs_btree_rec_offset(cur, last + 1) - 1); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + /* * Increment cursor by one record at the level. * For nonzero levels the leaf-ward information is untouched. @@ -1576,3 +1634,66 @@ xfs_btree_updkey( XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } + +/* + * Update the record referred to by cur to the value in the + * given record. This either works (return 0) or gets an + * EFSCORRUPTED error. + */ +int +xfs_btree_update( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + int error; + int ptr; + union xfs_btree_rec *rp; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGR(cur, rec); + + /* Pick up the current block. */ + block = xfs_btree_get_block(cur, 0, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, 0, bp); + if (error) + goto error0; +#endif + /* Get the address of the rec to be updated. */ + ptr = cur->bc_ptrs[0]; + rp = xfs_btree_rec_addr(cur, ptr, block); + + /* Fill in the new contents and log them. */ + xfs_btree_copy_recs(cur, rp, rec, 1); + xfs_btree_log_recs(cur, bp, ptr, ptr); + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, 0)) { + cur->bc_ops->update_lastrec(cur, block, rec, + ptr, LASTREC_UPDATE); + } + + /* Updating first rec in leaf. Pass new key value up to our parent. */ + if (ptr == 1) { + union xfs_btree_key key; + + cur->bc_ops->init_key_from_rec(&key, rec); + error = xfs_btree_updkey(cur, &key, 1); + if (error) + goto error0; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index ac3f527b0ac..c3bfa5556c1 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -187,6 +187,12 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + /* update last record information */ + void (*update_lastrec)(struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_rec *rec, + int ptr, int reason); + /* records in block/level */ int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); @@ -220,6 +226,12 @@ struct xfs_btree_ops { #endif }; +/* + * Reasons for the update_lastrec method to be called. + */ +#define LASTREC_UPDATE 0 + + /* * Btree cursor structure. * This collects all information needed by the btree code in one place. @@ -264,6 +276,7 @@ typedef struct xfs_btree_cur /* cursor flags */ #define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ #define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ +#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ #define XFS_BTREE_NOERROR 0 @@ -519,6 +532,7 @@ int xfs_btree_increment(struct xfs_btree_cur *, int, int *); int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); +int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index bbf537f64c4..138651afd44 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -171,6 +171,26 @@ xfs_inobt_lookup_le( return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); } +/* + * Update the record referred to by cur to the value given + * by [ino, fcnt, free]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int /* error */ +xfs_inobt_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free) /* free inode mask */ +{ + union xfs_btree_rec rec; + + rec.inobt.ir_startino = cpu_to_be32(ino); + rec.inobt.ir_freecount = cpu_to_be32(fcnt); + rec.inobt.ir_free = cpu_to_be64(free); + return xfs_btree_update(cur, &rec); +} + /* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index cd8bb519cb5..d080a6833a8 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -1551,58 +1551,6 @@ xfs_inobt_insert( return 0; } -/* - * Update the record referred to by cur, to the value given - * by [ino, fcnt, free]. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -int /* error */ -xfs_inobt_update( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free) /* free inode mask */ -{ - xfs_inobt_block_t *block; /* btree block to update */ - xfs_buf_t *bp; /* buffer containing btree block */ - int error; /* error return value */ - int ptr; /* current record number (updating) */ - xfs_inobt_rec_t *rp; /* pointer to updated record */ - - /* - * Pick up the current block. - */ - bp = cur->bc_bufs[0]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, 0, bp))) - return error; -#endif - /* - * Get the address of the rec to be updated. - */ - ptr = cur->bc_ptrs[0]; - rp = XFS_INOBT_REC_ADDR(block, ptr, cur); - /* - * Fill in the new contents and log them. - */ - rp->ir_startino = cpu_to_be32(ino); - rp->ir_freecount = cpu_to_be32(fcnt); - rp->ir_free = cpu_to_be64(free); - xfs_inobt_log_recs(cur, bp, ptr, ptr); - /* - * Updating first record in leaf. Pass new key value up to our parent. - */ - if (ptr == 1) { - xfs_inobt_key_t key; /* key containing [ino] */ - - key.ir_startino = cpu_to_be32(ino); - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) - return error; - } - return 0; -} - STATIC struct xfs_btree_cur * xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 674b459521f..7f77549e82a 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -135,14 +135,6 @@ extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, */ extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat); -/* - * Update the record referred to by cur, to the value given - * by [ino, fcnt, free]. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free); - extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); -- cgit v1.2.3 From 9eaead51bed957af0070a277d945744a76df0c8b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:43 +1100 Subject: [XFS] implement generic xfs_btree_rshift Make the btree right shift code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32196a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 136 +------------------- fs/xfs/xfs_bmap_btree.c | 142 +-------------------- fs/xfs/xfs_btree.c | 319 +++++++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_btree.h | 7 + fs/xfs/xfs_ialloc_btree.c | 135 +------------------- 5 files changed, 331 insertions(+), 408 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index c5c32999b81..31e42891fc9 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -49,7 +49,6 @@ STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); -STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_alloc_key_t *, xfs_btree_cur_t **, int *); @@ -391,7 +390,7 @@ xfs_alloc_delrec( */ if (be16_to_cpu(left->bb_numrecs) - 1 >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_alloc_rshift(tcur, level, &i))) + if ((error = xfs_btree_rshift(tcur, level, &i))) goto error0; if (i) { ASSERT(be16_to_cpu(block->bb_numrecs) >= @@ -683,7 +682,7 @@ xfs_alloc_insrec( /* * First, try shifting an entry to the right neighbor. */ - if ((error = xfs_alloc_rshift(cur, level, &i))) + if ((error = xfs_btree_rshift(cur, level, &i))) return error; if (i) { /* nothing */ @@ -1232,137 +1231,6 @@ xfs_alloc_newroot( return 0; } -/* - * Move 1 record right from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_alloc_rshift( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to shift record on */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* loop index */ - xfs_alloc_key_t key; /* key value for leaf level upward */ - xfs_buf_t *lbp; /* buffer for left (current) block */ - xfs_alloc_block_t *left; /* left (current) btree block */ - xfs_buf_t *rbp; /* buffer for right neighbor block */ - xfs_alloc_block_t *right; /* right neighbor btree block */ - xfs_alloc_key_t *rkp; /* key pointer for right block */ - xfs_btree_cur_t *tcur; /* temporary cursor */ - - /* - * Set up variables for this block as "left". - */ - lbp = cur->bc_bufs[level]; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; -#endif - /* - * If we've got no right sibling then we can't shift an entry right. - */ - if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) { - *stat = 0; - return 0; - } - /* - * Set up the right neighbor as "right". - */ - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), - 0, &rbp, XFS_ALLOC_BTREE_REF))) - return error; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; - /* - * If it's full, it can't take another entry. - */ - if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - *stat = 0; - return 0; - } - /* - * Make a hole at the start of the right neighbor block, then - * copy the last left block entry to the hole. - */ - if (level > 0) { - xfs_alloc_key_t *lkp; /* key pointer for left block */ - xfs_alloc_ptr_t *lpp; /* address pointer for left block */ - xfs_alloc_ptr_t *rpp; /* address pointer for right block */ - - lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); - rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) - return error; - } -#endif - memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) - return error; -#endif - *rkp = *lkp; - *rpp = *lpp; - xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); - } else { - xfs_alloc_rec_t *lrp; /* record pointer for left block */ - xfs_alloc_rec_t *rrp; /* record pointer for right block */ - - lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - *rrp = *lrp; - xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - key.ar_startblock = rrp->ar_startblock; - key.ar_blockcount = rrp->ar_blockcount; - rkp = &key; - xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1); - } - /* - * Decrement and log left's numrecs, bump and log right's numrecs. - */ - be16_add_cpu(&left->bb_numrecs, -1); - xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add_cpu(&right->bb_numrecs, 1); - xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); - /* - * Using a temporary cursor, update the parent key values of the - * block on the right. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - return error; - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i)) || - (error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) - goto error0; - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - *stat = 1; - return 0; -error0: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - /* * Split cur/level block in half. * Return new block number and its first record (to be inserted into parent). diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 99200a9898f..9d18fa8aa1d 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -53,7 +53,6 @@ STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *); STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *); -STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, __uint64_t *, xfs_btree_cur_t **, int *); @@ -327,7 +326,7 @@ xfs_bmbt_delrec( bno = be64_to_cpu(left->bb_rightsib); if (be16_to_cpu(left->bb_numrecs) - 1 >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if ((error = xfs_bmbt_rshift(tcur, level, &i))) { + if ((error = xfs_btree_rshift(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -538,7 +537,7 @@ xfs_bmbt_insrec( logflags); block = xfs_bmbt_get_block(cur, level, &bp); } else { - if ((error = xfs_bmbt_rshift(cur, level, &i))) { + if ((error = xfs_btree_rshift(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -944,143 +943,6 @@ xfs_bmbt_lshift( return 0; } -/* - * Move 1 record right from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_bmbt_rshift( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* loop counter */ - xfs_bmbt_key_t key; /* bmap btree key */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_bmbt_block_t *left; /* left btree block */ - xfs_bmbt_key_t *lkp; /* left btree key */ - xfs_bmbt_ptr_t *lpp; /* left address pointer */ - xfs_bmbt_rec_t *lrp; /* left record pointer */ - xfs_mount_t *mp; /* file system mount point */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_bmbt_block_t *right; /* right btree block */ - xfs_bmbt_key_t *rkp; /* right btree key */ - xfs_bmbt_ptr_t *rpp; /* right address pointer */ - xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */ - struct xfs_btree_cur *tcur; /* temporary btree cursor */ - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - if (level == cur->bc_nlevels - 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - lbp = cur->bc_bufs[level]; - left = XFS_BUF_TO_BMBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (be64_to_cpu(left->bb_rightsib) == NULLDFSBNO) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - mp = cur->bc_mp; - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(left->bb_rightsib), 0, - &rbp, XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - right = XFS_BUF_TO_BMBT_BLOCK(rbp); - if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (be16_to_cpu(right->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - if (level > 0) { - lkp = XFS_BMAP_KEY_IADDR(left, be16_to_cpu(left->bb_numrecs), cur); - lpp = XFS_BMAP_PTR_IADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); - rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); -#ifdef DEBUG - for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { - if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - *rkp = *lkp; - *rpp = *lpp; - xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - } else { - lrp = XFS_BMAP_REC_IADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rrp = XFS_BMAP_REC_IADDR(right, 1, cur); - memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - *rrp = *lrp; - xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); - rkp = &key; - } - be16_add_cpu(&left->bb_numrecs, -1); - xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS); - be16_add_cpu(&right->bb_numrecs, 1); -#ifdef DEBUG - if (level > 0) - xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1); - else - xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1); -#endif - xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS); - if ((error = xfs_btree_dup_cursor(cur, &tcur))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(tcur, ERROR); - goto error1; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) { - XFS_BMBT_TRACE_CURSOR(tcur, ERROR); - goto error1; - } - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -error0: - XFS_BMBT_TRACE_CURSOR(cur, ERROR); -error1: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - /* * Determine the extent state. */ diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 205272f282d..e1a21378184 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1117,6 +1117,77 @@ xfs_btree_copy_recs( memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); } +/* + * Copy block pointers from one btree block to another. + */ +STATIC void +xfs_btree_copy_ptrs( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *dst_ptr, + union xfs_btree_ptr *src_ptr, + int numptrs) +{ + ASSERT(numptrs >= 0); + memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); +} + +/* + * Shift keys one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + int dir, + int numkeys) +{ + char *dst_key; + + ASSERT(numkeys >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_key = (char *)key + (dir * cur->bc_ops->key_len); + memmove(dst_key, key, numkeys * cur->bc_ops->key_len); +} + +/* + * Shift records one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_recs( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + int dir, + int numrecs) +{ + char *dst_rec; + + ASSERT(numrecs >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); + memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); +} + +/* + * Shift block pointers one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_ptrs( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int dir, + int numptrs) +{ + char *dst_ptr; + + ASSERT(numptrs >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); + memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); +} + /* * Log key values from the btree block. */ @@ -1162,6 +1233,79 @@ xfs_btree_log_recs( XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); } +/* + * Log block pointer fields from a btree block (nonleaf). + */ +STATIC void +xfs_btree_log_ptrs( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_buf *bp, /* buffer containing btree block */ + int first, /* index of first pointer to log */ + int last) /* index of last pointer to log */ +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + if (bp) { + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + int level = xfs_btree_get_level(block); + + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_ptr_offset(cur, first, level), + xfs_btree_ptr_offset(cur, last + 1, level) - 1); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* + * Log fields from a btree block header. + */ +STATIC void +xfs_btree_log_block( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_buf *bp, /* buffer containing btree block */ + int fields) /* mask of fields: XFS_BB_... */ +{ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + static const short soffsets[] = { /* table of offsets (short) */ + offsetof(struct xfs_btree_sblock, bb_magic), + offsetof(struct xfs_btree_sblock, bb_level), + offsetof(struct xfs_btree_sblock, bb_numrecs), + offsetof(struct xfs_btree_sblock, bb_leftsib), + offsetof(struct xfs_btree_sblock, bb_rightsib), + sizeof(struct xfs_btree_sblock) + }; + static const short loffsets[] = { /* table of offsets (long) */ + offsetof(struct xfs_btree_lblock, bb_magic), + offsetof(struct xfs_btree_lblock, bb_level), + offsetof(struct xfs_btree_lblock, bb_numrecs), + offsetof(struct xfs_btree_lblock, bb_leftsib), + offsetof(struct xfs_btree_lblock, bb_rightsib), + sizeof(struct xfs_btree_lblock) + }; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBI(cur, bp, fields); + + if (bp) { + xfs_btree_offsets(fields, + (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + loffsets : soffsets, + XFS_BB_NUM_BITS, &first, &last); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + /* * Increment cursor by one record at the level. * For nonzero levels the leaf-ward information is untouched. @@ -1368,7 +1512,6 @@ error0: return error; } - STATIC int xfs_btree_lookup_get_block( struct xfs_btree_cur *cur, /* btree cursor */ @@ -1697,3 +1840,177 @@ error0: return error; } +/* + * Move 1 record right from cur/level if possible. + * Update cur to reflect the new path. + */ +int /* error */ +xfs_btree_rshift( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + union xfs_btree_key key; /* btree key */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + struct xfs_btree_cur *tcur; /* temporary btree cursor */ + union xfs_btree_ptr rptr; /* right block pointer */ + union xfs_btree_key *rkp; /* right btree key */ + int rrecs; /* right record count */ + int lrecs; /* left record count */ + int error; /* error return value */ + int i; /* loop counter */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) + goto out0; + + /* Set up variables for this block as "left". */ + left = xfs_btree_get_block(cur, level, &lbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + + /* If we've got no right sibling then we can't shift an entry right. */ + xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &rptr)) + goto out0; + + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + lrecs = xfs_btree_get_numrecs(left); + if (cur->bc_ptrs[level] >= lrecs) + goto out0; + + /* Set up the right neighbor as "right". */ + error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); + if (error) + goto error0; + + /* If it's full, it can't take another entry. */ + rrecs = xfs_btree_get_numrecs(right); + if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) + goto out0; + + XFS_BTREE_STATS_INC(cur, rshift); + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + + /* + * Make a hole at the start of the right neighbor block, then + * copy the last left block entry to the hole. + */ + if (level > 0) { + /* It's a nonleaf. make a hole in the keys and ptrs */ + union xfs_btree_key *lkp; + union xfs_btree_ptr *lpp; + union xfs_btree_ptr *rpp; + + lkp = xfs_btree_key_addr(cur, lrecs, left); + lpp = xfs_btree_ptr_addr(cur, lrecs, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); + +#ifdef DEBUG + for (i = rrecs - 1; i >= 0; i--) { + error = xfs_btree_check_ptr(cur, rpp, i, level); + if (error) + goto error0; + } +#endif + + xfs_btree_shift_keys(cur, rkp, 1, rrecs); + xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, lpp, 0, level); + if (error) + goto error0; +#endif + + /* Now put the new data in, and log it. */ + xfs_btree_copy_keys(cur, rkp, lkp, 1); + xfs_btree_copy_ptrs(cur, rpp, lpp, 1); + + xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); + + xfs_btree_check_key(cur->bc_btnum, rkp, + xfs_btree_key_addr(cur, 2, right)); + } else { + /* It's a leaf. make a hole in the records */ + union xfs_btree_rec *lrp; + union xfs_btree_rec *rrp; + + lrp = xfs_btree_rec_addr(cur, lrecs, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_shift_recs(cur, rrp, 1, rrecs); + + /* Now put the new data in, and log it. */ + xfs_btree_copy_recs(cur, rrp, lrp, 1); + xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); + + cur->bc_ops->init_key_from_rec(&key, rrp); + rkp = &key; + + xfs_btree_check_rec(cur->bc_btnum, rrp, + xfs_btree_rec_addr(cur, 2, right)); + } + + /* + * Decrement and log left's numrecs, bump and log right's numrecs. + */ + xfs_btree_set_numrecs(left, --lrecs); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); + + xfs_btree_set_numrecs(right, ++rrecs); + xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); + + /* + * Using a temporary cursor, update the parent key values of the + * block on the right. + */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_increment(tcur, level, &i); + if (error) + goto error1; + + error = xfs_btree_updkey(tcur, rkp, level + 1); + if (error) + goto error1; + + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + +error1: + XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index c3bfa5556c1..04311dbeff1 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -533,6 +533,7 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); +int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); /* * Helpers. @@ -542,6 +543,12 @@ static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block) return be16_to_cpu(block->bb_numrecs); } +static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block, + __uint16_t numrecs) +{ + block->bb_numrecs = cpu_to_be16(numrecs); +} + static inline int xfs_btree_get_level(struct xfs_btree_block *block) { return be16_to_cpu(block->bb_level); diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index d080a6833a8..457f88a76e1 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -45,7 +45,6 @@ STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); -STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_inobt_key_t *, xfs_btree_cur_t **, int *); @@ -337,7 +336,7 @@ xfs_inobt_delrec( */ if (be16_to_cpu(left->bb_numrecs) - 1 >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_inobt_rshift(tcur, level, &i))) + if ((error = xfs_btree_rshift(tcur, level, &i))) goto error0; if (i) { ASSERT(be16_to_cpu(block->bb_numrecs) >= @@ -608,7 +607,7 @@ xfs_inobt_insrec( /* * First, try shifting an entry to the right neighbor. */ - if ((error = xfs_inobt_rshift(cur, level, &i))) + if ((error = xfs_btree_rshift(cur, level, &i))) return error; if (i) { /* nothing */ @@ -1116,136 +1115,6 @@ xfs_inobt_newroot( return 0; } -/* - * Move 1 record right from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_inobt_rshift( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to shift record on */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* loop index */ - xfs_inobt_key_t key; /* key value for leaf level upward */ - xfs_buf_t *lbp; /* buffer for left (current) block */ - xfs_inobt_block_t *left; /* left (current) btree block */ - xfs_inobt_key_t *lkp; /* key pointer for left block */ - xfs_inobt_ptr_t *lpp; /* address pointer for left block */ - xfs_inobt_rec_t *lrp; /* record pointer for left block */ - xfs_buf_t *rbp; /* buffer for right neighbor block */ - xfs_inobt_block_t *right; /* right neighbor btree block */ - xfs_inobt_key_t *rkp; /* key pointer for right block */ - xfs_inobt_ptr_t *rpp; /* address pointer for right block */ - xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */ - xfs_btree_cur_t *tcur; /* temporary cursor */ - - /* - * Set up variables for this block as "left". - */ - lbp = cur->bc_bufs[level]; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; -#endif - /* - * If we've got no right sibling then we can't shift an entry right. - */ - if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) { - *stat = 0; - return 0; - } - /* - * Set up the right neighbor as "right". - */ - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), - 0, &rbp, XFS_INO_BTREE_REF))) - return error; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; - /* - * If it's full, it can't take another entry. - */ - if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - *stat = 0; - return 0; - } - /* - * Make a hole at the start of the right neighbor block, then - * copy the last left block entry to the hole. - */ - if (level > 0) { - lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); - rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) - return error; - } -#endif - memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) - return error; -#endif - *rkp = *lkp; - *rpp = *lpp; - xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - } else { - lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rrp = XFS_INOBT_REC_ADDR(right, 1, cur); - memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - *rrp = *lrp; - xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - key.ir_startino = rrp->ir_startino; - rkp = &key; - } - /* - * Decrement and log left's numrecs, bump and log right's numrecs. - */ - be16_add_cpu(&left->bb_numrecs, -1); - xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add_cpu(&right->bb_numrecs, 1); -#ifdef DEBUG - if (level > 0) - xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); - else - xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1); -#endif - xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); - /* - * Using a temporary cursor, update the parent key values of the - * block on the right. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - return error; - xfs_btree_lastrec(tcur, level); - if ((error = xfs_btree_increment(tcur, level, &i)) || - (error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) { - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; - } - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - *stat = 1; - return 0; -} - /* * Split cur/level block in half. * Return new block number and its first record (to be inserted into parent). -- cgit v1.2.3 From 687b890a184fef263ebb773926e1f4aa69240d01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:53 +1100 Subject: [XFS] implement generic xfs_btree_lshift Make the btree left shift code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32197a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 146 +----------------------------------- fs/xfs/xfs_bmap_btree.c | 138 +--------------------------------- fs/xfs/xfs_btree.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 1 + fs/xfs/xfs_ialloc_btree.c | 147 +----------------------------------- 5 files changed, 192 insertions(+), 425 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 31e42891fc9..974a412ebc8 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -47,7 +47,6 @@ STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_alloc_key_t *, xfs_btree_cur_t **, int *); @@ -326,7 +325,7 @@ xfs_alloc_delrec( */ if (be16_to_cpu(right->bb_numrecs) - 1 >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_alloc_lshift(tcur, level, &i))) + if ((error = xfs_btree_lshift(tcur, level, &i))) goto error0; if (i) { ASSERT(be16_to_cpu(block->bb_numrecs) >= @@ -691,7 +690,7 @@ xfs_alloc_insrec( * Next, try shifting an entry to the left neighbor. */ else { - if ((error = xfs_alloc_lshift(cur, level, &i))) + if ((error = xfs_btree_lshift(cur, level, &i))) return error; if (i) optr = ptr = cur->bc_ptrs[level]; @@ -935,147 +934,6 @@ xfs_alloc_log_recs( xfs_trans_log_buf(cur->bc_tp, bp, first, last); } -/* - * Move 1 record left from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_alloc_lshift( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to shift record on */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ -#ifdef DEBUG - int i; /* loop index */ -#endif - xfs_alloc_key_t key; /* key value for leaf level upward */ - xfs_buf_t *lbp; /* buffer for left neighbor block */ - xfs_alloc_block_t *left; /* left neighbor btree block */ - int nrec; /* new number of left block entries */ - xfs_buf_t *rbp; /* buffer for right (current) block */ - xfs_alloc_block_t *right; /* right (current) btree block */ - xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */ - xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */ - xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */ - - /* - * Set up variables for this block as "right". - */ - rbp = cur->bc_bufs[level]; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; -#endif - /* - * If we've got no left sibling then we can't shift an entry left. - */ - if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] <= 1) { - *stat = 0; - return 0; - } - /* - * Set up the left neighbor as "left". - */ - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib), - 0, &lbp, XFS_ALLOC_BTREE_REF))) - return error; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; - /* - * If it's full, it can't take another entry. - */ - if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - *stat = 0; - return 0; - } - nrec = be16_to_cpu(left->bb_numrecs) + 1; - /* - * If non-leaf, copy a key and a ptr to the left block. - */ - if (level > 0) { - xfs_alloc_key_t *lkp; /* key pointer for left block */ - xfs_alloc_ptr_t *lpp; /* address pointer for left block */ - - lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur); - rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); - *lkp = *rkp; - xfs_alloc_log_keys(cur, lbp, nrec, nrec); - lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur); - rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level))) - return error; -#endif - *lpp = *rpp; - xfs_alloc_log_ptrs(cur, lbp, nrec, nrec); - xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); - } - /* - * If leaf, copy a record to the left block. - */ - else { - xfs_alloc_rec_t *lrp; /* record pointer for left block */ - - lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur); - rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - *lrp = *rrp; - xfs_alloc_log_recs(cur, lbp, nrec, nrec); - xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); - } - /* - * Bump and log left's numrecs, decrement and log right's numrecs. - */ - be16_add_cpu(&left->bb_numrecs, 1); - xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add_cpu(&right->bb_numrecs, -1); - xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); - /* - * Slide the contents of right down one entry. - */ - if (level > 0) { -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]), - level))) - return error; - } -#endif - memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - } else { - memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - key.ar_startblock = rrp->ar_startblock; - key.ar_blockcount = rrp->ar_blockcount; - rkp = &key; - } - /* - * Update the parent key values of right. - */ - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) - return error; - /* - * Slide the cursor value left one. - */ - cur->bc_ptrs[level]--; - *stat = 1; - return 0; -} - /* * Allocate a new root block, fill it in. */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 9d18fa8aa1d..809bd6ee177 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -52,7 +52,6 @@ STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *); STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, __uint64_t *, xfs_btree_cur_t **, int *); @@ -270,7 +269,7 @@ xfs_bmbt_delrec( bno = be64_to_cpu(right->bb_leftsib); if (be16_to_cpu(right->bb_numrecs) - 1 >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if ((error = xfs_bmbt_lshift(tcur, level, &i))) { + if ((error = xfs_btree_lshift(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -544,7 +543,7 @@ xfs_bmbt_insrec( if (i) { /* nothing */ } else { - if ((error = xfs_bmbt_lshift(cur, level, &i))) { + if ((error = xfs_btree_lshift(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -810,139 +809,6 @@ xfs_bmbt_log_ptrs( XFS_BMBT_TRACE_CURSOR(cur, EXIT); } -/* - * Move 1 record left from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_bmbt_lshift( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - int error; /* error return value */ -#ifdef DEBUG - int i; /* loop counter */ -#endif - xfs_bmbt_key_t key; /* bmap btree key */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_bmbt_block_t *left; /* left btree block */ - xfs_bmbt_key_t *lkp=NULL; /* left btree key */ - xfs_bmbt_ptr_t *lpp; /* left address pointer */ - int lrecs; /* left record count */ - xfs_bmbt_rec_t *lrp=NULL; /* left record pointer */ - xfs_mount_t *mp; /* file system mount point */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_bmbt_block_t *right; /* right btree block */ - xfs_bmbt_key_t *rkp=NULL; /* right btree key */ - xfs_bmbt_ptr_t *rpp=NULL; /* right address pointer */ - xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */ - int rrecs; /* right record count */ - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - if (level == cur->bc_nlevels - 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - rbp = cur->bc_bufs[level]; - right = XFS_BUF_TO_BMBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (be64_to_cpu(right->bb_leftsib) == NULLDFSBNO) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - if (cur->bc_ptrs[level] <= 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - mp = cur->bc_mp; - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(right->bb_leftsib), 0, - &lbp, XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - left = XFS_BUF_TO_BMBT_BLOCK(lbp); - if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (be16_to_cpu(left->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - lrecs = be16_to_cpu(left->bb_numrecs) + 1; - if (level > 0) { - lkp = XFS_BMAP_KEY_IADDR(left, lrecs, cur); - rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); - *lkp = *rkp; - xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs); - lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur); - rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - *lpp = *rpp; - xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs); - } else { - lrp = XFS_BMAP_REC_IADDR(left, lrecs, cur); - rrp = XFS_BMAP_REC_IADDR(right, 1, cur); - *lrp = *rrp; - xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs); - } - left->bb_numrecs = cpu_to_be16(lrecs); - xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS); -#ifdef DEBUG - if (level > 0) - xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp); - else - xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp); -#endif - rrecs = be16_to_cpu(right->bb_numrecs) - 1; - right->bb_numrecs = cpu_to_be16(rrecs); - xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS); - if (level > 0) { -#ifdef DEBUG - for (i = 0; i < rrecs; i++) { - if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1], - level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memmove(rkp, rkp + 1, rrecs * sizeof(*rkp)); - memmove(rpp, rpp + 1, rrecs * sizeof(*rpp)); - xfs_bmbt_log_keys(cur, rbp, 1, rrecs); - xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs); - } else { - memmove(rrp, rrp + 1, rrecs * sizeof(*rrp)); - xfs_bmbt_log_recs(cur, rbp, 1, rrecs); - key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); - rkp = &key; - } - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - cur->bc_ptrs[level]--; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - /* * Determine the extent state. */ diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e1a21378184..2b0d1422c4c 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1840,6 +1840,191 @@ error0: return error; } +/* + * Move 1 record left from cur/level if possible. + * Update cur to reflect the new path. + */ +int /* error */ +xfs_btree_lshift( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + union xfs_btree_key key; /* btree key */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + int lrecs; /* left record count */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + int rrecs; /* right record count */ + union xfs_btree_ptr lptr; /* left btree pointer */ + union xfs_btree_key *rkp = NULL; /* right btree key */ + union xfs_btree_ptr *rpp = NULL; /* right address pointer */ + union xfs_btree_rec *rrp = NULL; /* right record pointer */ + int error; /* error return value */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 1) + goto out0; + + /* Set up variables for this block as "right". */ + right = xfs_btree_get_block(cur, level, &rbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, right, level, rbp); + if (error) + goto error0; +#endif + + /* If we've got no left sibling then we can't shift an entry left. */ + xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + if (xfs_btree_ptr_is_null(cur, &lptr)) + goto out0; + + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] <= 1) + goto out0; + + /* Set up the left neighbor as "left". */ + error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); + if (error) + goto error0; + + /* If it's full, it can't take another entry. */ + lrecs = xfs_btree_get_numrecs(left); + if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) + goto out0; + + rrecs = xfs_btree_get_numrecs(right); + + /* + * We add one entry to the left side and remove one for the right side. + * Accout for it here, the changes will be updated on disk and logged + * later. + */ + lrecs++; + rrecs--; + + XFS_BTREE_STATS_INC(cur, lshift); + XFS_BTREE_STATS_ADD(cur, moves, 1); + + /* + * If non-leaf, copy a key and a ptr to the left block. + * Log the changes to the left block. + */ + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + + lkp = xfs_btree_key_addr(cur, lrecs, left); + rkp = xfs_btree_key_addr(cur, 1, right); + + lpp = xfs_btree_ptr_addr(cur, lrecs, left); + rpp = xfs_btree_ptr_addr(cur, 1, right); +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, rpp, 0, level); + if (error) + goto error0; +#endif + xfs_btree_copy_keys(cur, lkp, rkp, 1); + xfs_btree_copy_ptrs(cur, lpp, rpp, 1); + + xfs_btree_log_keys(cur, lbp, lrecs, lrecs); + xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); + + xfs_btree_check_key(cur->bc_btnum, + xfs_btree_key_addr(cur, lrecs - 1, left), + lkp); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + + lrp = xfs_btree_rec_addr(cur, lrecs, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, lrp, rrp, 1); + xfs_btree_log_recs(cur, lbp, lrecs, lrecs); + + xfs_btree_check_rec(cur->bc_btnum, + xfs_btree_rec_addr(cur, lrecs - 1, left), + lrp); + } + + xfs_btree_set_numrecs(left, lrecs); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); + + xfs_btree_set_numrecs(right, rrecs); + xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); + + /* + * Slide the contents of right down one entry. + */ + XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); + if (level > 0) { + /* It's a nonleaf. operate on keys and ptrs */ +#ifdef DEBUG + int i; /* loop index */ + + for (i = 0; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, rpp, i + 1, level); + if (error) + goto error0; + } +#endif + xfs_btree_shift_keys(cur, + xfs_btree_key_addr(cur, 2, right), + -1, rrecs); + xfs_btree_shift_ptrs(cur, + xfs_btree_ptr_addr(cur, 2, right), + -1, rrecs); + + xfs_btree_log_keys(cur, rbp, 1, rrecs); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs); + } else { + /* It's a leaf. operate on records */ + xfs_btree_shift_recs(cur, + xfs_btree_rec_addr(cur, 2, right), + -1, rrecs); + xfs_btree_log_recs(cur, rbp, 1, rrecs); + + /* + * If it's the first record in the block, we'll need a key + * structure to pass up to the next level (updkey). + */ + cur->bc_ops->init_key_from_rec(&key, + xfs_btree_rec_addr(cur, 1, right)); + rkp = &key; + } + + /* Update the parent key values of right. */ + error = xfs_btree_updkey(cur, rkp, level + 1); + if (error) + goto error0; + + /* Slide the cursor value left one. */ + cur->bc_ptrs[level]--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + /* * Move 1 record right from cur/level if possible. * Update cur to reflect the new path. diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 04311dbeff1..7cde287b5c9 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -533,6 +533,7 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); +int xfs_btree_lshift(struct xfs_btree_cur *, int, int *); int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); /* diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 457f88a76e1..60f5db5d6df 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -43,7 +43,6 @@ STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int); STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_inobt_key_t *, xfs_btree_cur_t **, int *); @@ -276,7 +275,7 @@ xfs_inobt_delrec( */ if (be16_to_cpu(right->bb_numrecs) - 1 >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_inobt_lshift(tcur, level, &i))) + if ((error = xfs_btree_lshift(tcur, level, &i))) goto error0; if (i) { ASSERT(be16_to_cpu(block->bb_numrecs) >= @@ -616,7 +615,7 @@ xfs_inobt_insrec( * Next, try shifting an entry to the left neighbor. */ else { - if ((error = xfs_inobt_lshift(cur, level, &i))) + if ((error = xfs_btree_lshift(cur, level, &i))) return error; if (i) { optr = ptr = cur->bc_ptrs[level]; @@ -826,148 +825,6 @@ xfs_inobt_log_recs( xfs_trans_log_buf(cur->bc_tp, bp, first, last); } -/* - * Move 1 record left from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_inobt_lshift( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to shift record on */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ -#ifdef DEBUG - int i; /* loop index */ -#endif - xfs_inobt_key_t key; /* key value for leaf level upward */ - xfs_buf_t *lbp; /* buffer for left neighbor block */ - xfs_inobt_block_t *left; /* left neighbor btree block */ - xfs_inobt_key_t *lkp=NULL; /* key pointer for left block */ - xfs_inobt_ptr_t *lpp; /* address pointer for left block */ - xfs_inobt_rec_t *lrp=NULL; /* record pointer for left block */ - int nrec; /* new number of left block entries */ - xfs_buf_t *rbp; /* buffer for right (current) block */ - xfs_inobt_block_t *right; /* right (current) btree block */ - xfs_inobt_key_t *rkp=NULL; /* key pointer for right block */ - xfs_inobt_ptr_t *rpp=NULL; /* address pointer for right block */ - xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */ - - /* - * Set up variables for this block as "right". - */ - rbp = cur->bc_bufs[level]; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; -#endif - /* - * If we've got no left sibling then we can't shift an entry left. - */ - if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] <= 1) { - *stat = 0; - return 0; - } - /* - * Set up the left neighbor as "left". - */ - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib), - 0, &lbp, XFS_INO_BTREE_REF))) - return error; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; - /* - * If it's full, it can't take another entry. - */ - if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - *stat = 0; - return 0; - } - nrec = be16_to_cpu(left->bb_numrecs) + 1; - /* - * If non-leaf, copy a key and a ptr to the left block. - */ - if (level > 0) { - lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur); - rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); - *lkp = *rkp; - xfs_inobt_log_keys(cur, lbp, nrec, nrec); - lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur); - rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level))) - return error; -#endif - *lpp = *rpp; - xfs_inobt_log_ptrs(cur, lbp, nrec, nrec); - } - /* - * If leaf, copy a record to the left block. - */ - else { - lrp = XFS_INOBT_REC_ADDR(left, nrec, cur); - rrp = XFS_INOBT_REC_ADDR(right, 1, cur); - *lrp = *rrp; - xfs_inobt_log_recs(cur, lbp, nrec, nrec); - } - /* - * Bump and log left's numrecs, decrement and log right's numrecs. - */ - be16_add_cpu(&left->bb_numrecs, 1); - xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); -#ifdef DEBUG - if (level > 0) - xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); - else - xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); -#endif - be16_add_cpu(&right->bb_numrecs, -1); - xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); - /* - * Slide the contents of right down one entry. - */ - if (level > 0) { -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]), - level))) - return error; - } -#endif - memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - } else { - memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - key.ir_startino = rrp->ir_startino; - rkp = &key; - } - /* - * Update the parent key values of right. - */ - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) - return error; - /* - * Slide the cursor value left one. - */ - cur->bc_ptrs[level]--; - *stat = 1; - return 0; -} - /* * Allocate a new root block, fill it in. */ -- cgit v1.2.3 From f5eb8e7ca58bc1e92436614444006120d21668ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:03 +1100 Subject: [XFS] implement generic xfs_btree_split Make the btree split code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32198a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 200 +++++++-------------------------- fs/xfs/xfs_bmap_btree.c | 275 ++++++++++++++++------------------------------ fs/xfs/xfs_btree.c | 268 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 8 ++ fs/xfs/xfs_ialloc_btree.c | 212 +++++++++-------------------------- 5 files changed, 460 insertions(+), 503 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 974a412ebc8..8a8d1aeec52 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -35,6 +35,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -48,8 +49,6 @@ STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); -STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, - xfs_alloc_key_t *, xfs_btree_cur_t **, int *); /* * Internal functions. @@ -695,15 +694,18 @@ xfs_alloc_insrec( if (i) optr = ptr = cur->bc_ptrs[level]; else { + union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) }; /* * Next, try splitting the current block in * half. If this works we have to re-set our * variables because we could be in a * different block now. */ - if ((error = xfs_alloc_split(cur, level, &nbno, - &nkey, &ncur, &i))) + if ((error = xfs_btree_split(cur, level, &bno, + (union xfs_btree_key *)&nkey, + &ncur, &i))) return error; + nbno = be32_to_cpu(bno.s); if (i) { bp = cur->bc_bufs[level]; block = XFS_BUF_TO_ALLOC_BLOCK(bp); @@ -1089,160 +1091,6 @@ xfs_alloc_newroot( return 0; } -/* - * Split cur/level block in half. - * Return new block number and its first record (to be inserted into parent). - */ -STATIC int /* error */ -xfs_alloc_split( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to split */ - xfs_agblock_t *bnop, /* output: block number allocated */ - xfs_alloc_key_t *keyp, /* output: first key of new block */ - xfs_btree_cur_t **curp, /* output: new cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* loop index/record number */ - xfs_agblock_t lbno; /* left (current) block number */ - xfs_buf_t *lbp; /* buffer for left block */ - xfs_alloc_block_t *left; /* left (current) btree block */ - xfs_agblock_t rbno; /* right (new) block number */ - xfs_buf_t *rbp; /* buffer for right block */ - xfs_alloc_block_t *right; /* right (new) btree block */ - - /* - * Allocate the new block from the freelist. - * If we can't do it, we're toast. Give up. - */ - error = xfs_alloc_get_freelist(cur->bc_tp, - cur->bc_private.a.agbp, &rbno, 1); - if (error) - return error; - if (rbno == NULLAGBLOCK) { - *stat = 0; - return 0; - } - xfs_trans_agbtree_delta(cur->bc_tp, 1); - rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno, - rbno, 0); - /* - * Set up the new block as "right". - */ - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); - /* - * "Left" is the current (according to the cursor) block. - */ - lbp = cur->bc_bufs[level]; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; -#endif - /* - * Fill in the btree header for the new block. - */ - right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - right->bb_level = left->bb_level; - right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2); - /* - * Make sure that if there's an odd number of entries now, that - * each new block will have the same number of entries. - */ - if ((be16_to_cpu(left->bb_numrecs) & 1) && - cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add_cpu(&right->bb_numrecs, 1); - i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; - /* - * For non-leaf blocks, copy keys and addresses over to the new block. - */ - if (level > 0) { - xfs_alloc_key_t *lkp; /* left btree key pointer */ - xfs_alloc_ptr_t *lpp; /* left btree address pointer */ - xfs_alloc_key_t *rkp; /* right btree key pointer */ - xfs_alloc_ptr_t *rpp; /* right btree address pointer */ - - lkp = XFS_ALLOC_KEY_ADDR(left, i, cur); - lpp = XFS_ALLOC_PTR_ADDR(left, i, cur); - rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); - rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level))) - return error; - } -#endif - memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - *keyp = *rkp; - } - /* - * For leaf blocks, copy records over to the new block. - */ - else { - xfs_alloc_rec_t *lrp; /* left btree record pointer */ - xfs_alloc_rec_t *rrp; /* right btree record pointer */ - - lrp = XFS_ALLOC_REC_ADDR(left, i, cur); - rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - keyp->ar_startblock = rrp->ar_startblock; - keyp->ar_blockcount = rrp->ar_blockcount; - } - /* - * Find the left block number by looking in the buffer. - * Adjust numrecs, sibling pointers. - */ - lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp)); - be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); - right->bb_rightsib = left->bb_rightsib; - left->bb_rightsib = cpu_to_be32(rbno); - right->bb_leftsib = cpu_to_be32(lbno); - xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS); - xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - /* - * If there's a block to the new block's right, make that block - * point back to right instead of to left. - */ - if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) { - xfs_alloc_block_t *rrblock; /* rr btree block */ - xfs_buf_t *rrbp; /* buffer for rrblock */ - - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0, - &rrbp, XFS_ALLOC_BTREE_REF))) - return error; - rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); - if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) - return error; - rrblock->bb_leftsib = cpu_to_be32(rbno); - xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); - } - /* - * If the cursor is really in the right block, move it there. - * If it's just pointing past the last entry in left, then we'll - * insert there, so don't change anything in that case. - */ - if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { - xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); - } - /* - * If there are more levels, we'll need another cursor which refers to - * the right block, no matter where this cursor was. - */ - if (level + 1 < cur->bc_nlevels) { - if ((error = xfs_btree_dup_cursor(cur, curp))) - return error; - (*curp)->bc_ptrs[level + 1]++; - } - *bnop = rbno; - *stat = 1; - return 0; -} /* * Externally visible routines. @@ -1396,6 +1244,41 @@ xfs_allocbt_dup_cursor( cur->bc_btnum); } +STATIC int +xfs_allocbt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int length, + int *stat) +{ + int error; + xfs_agblock_t bno; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + /* Allocate the new block from the freelist. If we can't, give up. */ + error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + &bno, 1); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + if (bno == NULLAGBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + xfs_trans_agbtree_delta(cur->bc_tp, 1); + new->s = cpu_to_be32(bno); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +} + /* * Update the longest extent in the AGF */ @@ -1557,6 +1440,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .key_len = sizeof(xfs_alloc_key_t), .dup_cursor = xfs_allocbt_dup_cursor, + .alloc_block = xfs_allocbt_alloc_block, .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 809bd6ee177..e7539263457 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -52,8 +52,6 @@ STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *); STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, - __uint64_t *, xfs_btree_cur_t **, int *); #undef EXIT @@ -550,13 +548,17 @@ xfs_bmbt_insrec( if (i) { optr = ptr = cur->bc_ptrs[level]; } else { - if ((error = xfs_bmbt_split(cur, level, - &nbno, &startoff, &ncur, + union xfs_btree_ptr bno = { .l = cpu_to_be64(nbno) }; + union xfs_btree_key skey; + if ((error = xfs_btree_split(cur, level, + &bno, &skey, &ncur, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } + nbno = be64_to_cpu(bno.l); + startoff = be64_to_cpu(skey.bmbt.br_startoff); if (i) { block = xfs_bmbt_get_block( cur, level, &bp); @@ -825,184 +827,6 @@ xfs_extent_state( return XFS_EXT_NORM; } - -/* - * Split cur/level block in half. - * Return new block number and its first record (to be inserted into parent). - */ -STATIC int /* error */ -xfs_bmbt_split( - xfs_btree_cur_t *cur, - int level, - xfs_fsblock_t *bnop, - __uint64_t *startoff, - xfs_btree_cur_t **curp, - int *stat) /* success/failure */ -{ - xfs_alloc_arg_t args; /* block allocation args */ - int error; /* error return value */ - int i; /* loop counter */ - xfs_fsblock_t lbno; /* left sibling block number */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_bmbt_block_t *left; /* left btree block */ - xfs_bmbt_key_t *lkp; /* left btree key */ - xfs_bmbt_ptr_t *lpp; /* left address pointer */ - xfs_bmbt_rec_t *lrp; /* left record pointer */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_bmbt_block_t *right; /* right btree block */ - xfs_bmbt_key_t *rkp; /* right btree key */ - xfs_bmbt_ptr_t *rpp; /* right address pointer */ - xfs_bmbt_block_t *rrblock; /* right-right btree block */ - xfs_buf_t *rrbp; /* right-right buffer pointer */ - xfs_bmbt_rec_t *rrp; /* right record pointer */ - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - // disable until merged into common code -// XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff); - args.tp = cur->bc_tp; - args.mp = cur->bc_mp; - lbp = cur->bc_bufs[level]; - lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); - left = XFS_BUF_TO_BMBT_BLOCK(lbp); - args.fsbno = cur->bc_private.b.firstblock; - args.firstblock = args.fsbno; - args.minleft = 0; - if (args.fsbno == NULLFSBLOCK) { - args.fsbno = lbno; - args.type = XFS_ALLOCTYPE_START_BNO; - /* - * Make sure there is sufficient room left in the AG to - * complete a full tree split for an extent insert. If - * we are converting the middle part of an extent then - * we may need space for two tree splits. - * - * We are relying on the caller to make the correct block - * reservation for this operation to succeed. If the - * reservation amount is insufficient then we may fail a - * block allocation here and corrupt the filesystem. - */ - args.minleft = xfs_trans_get_block_res(args.tp); - } else if (cur->bc_private.b.flist->xbf_low) - args.type = XFS_ALLOCTYPE_START_BNO; - else - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.mod = args.alignment = args.total = args.isfl = - args.userdata = args.minalignslop = 0; - args.minlen = args.maxlen = args.prod = 1; - args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; - if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return XFS_ERROR(ENOSPC); - } - if ((error = xfs_alloc_vextent(&args))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (args.fsbno == NULLFSBLOCK && args.minleft) { - /* - * Could not find an AG with enough free space to satisfy - * a full btree split. Try again without minleft and if - * successful activate the lowspace algorithm. - */ - args.fsbno = 0; - args.type = XFS_ALLOCTYPE_FIRST_AG; - args.minleft = 0; - if ((error = xfs_alloc_vextent(&args))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - cur->bc_private.b.flist->xbf_low = 1; - } - if (args.fsbno == NULLFSBLOCK) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - cur->bc_private.b.firstblock = args.fsbno; - cur->bc_private.b.allocated++; - cur->bc_private.b.ip->i_d.di_nblocks++; - xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, - XFS_TRANS_DQ_BCOUNT, 1L); - rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0); - right = XFS_BUF_TO_BMBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, left, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - right->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); - right->bb_level = left->bb_level; - right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2); - if ((be16_to_cpu(left->bb_numrecs) & 1) && - cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add_cpu(&right->bb_numrecs, 1); - i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; - if (level > 0) { - lkp = XFS_BMAP_KEY_IADDR(left, i, cur); - lpp = XFS_BMAP_PTR_IADDR(left, i, cur); - rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); - rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - *startoff = be64_to_cpu(rkp->br_startoff); - } else { - lrp = XFS_BMAP_REC_IADDR(left, i, cur); - rrp = XFS_BMAP_REC_IADDR(right, 1, cur); - memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - *startoff = xfs_bmbt_disk_get_startoff(rrp); - } - be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); - right->bb_rightsib = left->bb_rightsib; - left->bb_rightsib = cpu_to_be64(args.fsbno); - right->bb_leftsib = cpu_to_be64(lbno); - xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS); - xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - if (be64_to_cpu(right->bb_rightsib) != NULLDFSBNO) { - if ((error = xfs_btree_read_bufl(args.mp, args.tp, - be64_to_cpu(right->bb_rightsib), 0, &rrbp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp); - if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - rrblock->bb_leftsib = cpu_to_be64(args.fsbno); - xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB); - } - if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { - xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); - } - if (level + 1 < cur->bc_nlevels) { - if ((error = xfs_btree_dup_cursor(cur, curp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - (*curp)->bc_ptrs[level + 1]++; - } - *bnop = args.fsbno; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - /* * Convert on-disk form of btree root to in-memory form. */ @@ -1737,6 +1561,92 @@ xfs_bmbt_dup_cursor( return new; } +STATIC int +xfs_bmbt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int length, + int *stat) +{ + xfs_alloc_arg_t args; /* block allocation args */ + int error; /* error return value */ + + memset(&args, 0, sizeof(args)); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + args.fsbno = cur->bc_private.b.firstblock; + args.firstblock = args.fsbno; + + if (args.fsbno == NULLFSBLOCK) { + args.fsbno = be64_to_cpu(start->l); + args.type = XFS_ALLOCTYPE_START_BNO; + /* + * Make sure there is sufficient room left in the AG to + * complete a full tree split for an extent insert. If + * we are converting the middle part of an extent then + * we may need space for two tree splits. + * + * We are relying on the caller to make the correct block + * reservation for this operation to succeed. If the + * reservation amount is insufficient then we may fail a + * block allocation here and corrupt the filesystem. + */ + args.minleft = xfs_trans_get_block_res(args.tp); + } else if (cur->bc_private.b.flist->xbf_low) { + args.type = XFS_ALLOCTYPE_START_BNO; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + } + + args.minlen = args.maxlen = args.prod = 1; + args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; + if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { + error = XFS_ERROR(ENOSPC); + goto error0; + } + error = xfs_alloc_vextent(&args); + if (error) + goto error0; + + if (args.fsbno == NULLFSBLOCK && args.minleft) { + /* + * Could not find an AG with enough free space to satisfy + * a full btree split. Try again without minleft and if + * successful activate the lowspace algorithm. + */ + args.fsbno = 0; + args.type = XFS_ALLOCTYPE_FIRST_AG; + args.minleft = 0; + error = xfs_alloc_vextent(&args); + if (error) + goto error0; + cur->bc_private.b.flist->xbf_low = 1; + } + if (args.fsbno == NULLFSBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + cur->bc_private.b.ip->i_d.di_nblocks++; + xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); + XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, + XFS_TRANS_DQ_BCOUNT, 1L); + + new->l = cpu_to_be64(args.fsbno); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + + error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + STATIC int xfs_bmbt_get_maxrecs( struct xfs_btree_cur *cur, @@ -1861,6 +1771,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .key_len = sizeof(xfs_bmbt_key_t), .dup_cursor = xfs_bmbt_dup_cursor, + .alloc_block = xfs_bmbt_alloc_block, .get_maxrecs = xfs_bmbt_get_maxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 2b0d1422c4c..80576695fbe 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -988,6 +988,48 @@ xfs_btree_get_sibling( } } +STATIC void +xfs_btree_set_sibling( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_ptr *ptr, + int lr) +{ + ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (lr == XFS_BB_RIGHTSIB) + block->bb_u.l.bb_rightsib = ptr->l; + else + block->bb_u.l.bb_leftsib = ptr->l; + } else { + if (lr == XFS_BB_RIGHTSIB) + block->bb_u.s.bb_rightsib = ptr->s; + else + block->bb_u.s.bb_leftsib = ptr->s; + } +} + +STATIC void +xfs_btree_init_block( + struct xfs_btree_cur *cur, + int level, + int numrecs, + struct xfs_btree_block *new) /* new block */ +{ + new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); + new->bb_level = cpu_to_be16(level); + new->bb_numrecs = cpu_to_be16(numrecs); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); + new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); + } else { + new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); + } +} + /* * Return true if ptr is the last record in the btree and * we need to track updateѕ to this record. The decision @@ -1012,6 +1054,21 @@ xfs_btree_is_lastrec( return 1; } +STATIC void +xfs_btree_buf_to_ptr( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, + XFS_BUF_ADDR(bp))); + else { + ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp, + XFS_BUF_ADDR(bp))); + } +} + STATIC xfs_daddr_t xfs_btree_ptr_to_daddr( struct xfs_btree_cur *cur, @@ -1051,6 +1108,31 @@ xfs_btree_set_refs( } } +STATIC int +xfs_btree_get_buf_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int flags, + struct xfs_btree_block **block, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_daddr_t d; + + /* need to sort out how callers deal with failures first */ + ASSERT(!(flags & XFS_BUF_TRYLOCK)); + + d = xfs_btree_ptr_to_daddr(cur, ptr); + *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, + mp->m_bsize, flags); + + ASSERT(*bpp); + ASSERT(!XFS_BUF_GETERROR(*bpp)); + + *block = XFS_BUF_TO_BLOCK(*bpp); + return 0; +} + /* * Read in the buffer at the given ptr and return the buffer and * the block pointer within the buffer. @@ -2199,3 +2281,189 @@ error1: xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; } + +/* + * Split cur/level block in half. + * Return new block number and the key to its first + * record (to be inserted into parent). + */ +int /* error */ +xfs_btree_split( + struct xfs_btree_cur *cur, + int level, + union xfs_btree_ptr *ptrp, + union xfs_btree_key *key, + struct xfs_btree_cur **curp, + int *stat) /* success/failure */ +{ + union xfs_btree_ptr lptr; /* left sibling block ptr */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + union xfs_btree_ptr rptr; /* right sibling block ptr */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + union xfs_btree_ptr rrptr; /* right-right sibling ptr */ + struct xfs_buf *rrbp; /* right-right buffer pointer */ + struct xfs_btree_block *rrblock; /* right-right btree block */ + int lrecs; + int rrecs; + int src_index; + int error; /* error return value */ +#ifdef DEBUG + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); + + XFS_BTREE_STATS_INC(cur, split); + + /* Set up left block (current one). */ + left = xfs_btree_get_block(cur, level, &lbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + + xfs_btree_buf_to_ptr(cur, lbp, &lptr); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); + if (error) + goto error0; + if (*stat == 0) + goto out0; + XFS_BTREE_STATS_INC(cur, alloc); + + /* Set up the new block as "right". */ + error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); + if (error) + goto error0; + + /* Fill in the btree header for the new right block. */ + xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right); + + /* + * Split the entries between the old and the new block evenly. + * Make sure that if there's an odd number of entries now, that + * each new block will have the same number of entries. + */ + lrecs = xfs_btree_get_numrecs(left); + rrecs = lrecs / 2; + if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) + rrecs++; + src_index = (lrecs - rrecs + 1); + + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + + /* + * Copy btree block entries from the left block over to the + * new block, the right. Update the right block and log the + * changes. + */ + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + union xfs_btree_key *rkp; /* right btree key */ + union xfs_btree_ptr *rpp; /* right address pointer */ + + lkp = xfs_btree_key_addr(cur, src_index, left); + lpp = xfs_btree_ptr_addr(cur, src_index, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); + +#ifdef DEBUG + for (i = src_index; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, lpp, i, level); + if (error) + goto error0; + } +#endif + + xfs_btree_copy_keys(cur, rkp, lkp, rrecs); + xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); + + xfs_btree_log_keys(cur, rbp, 1, rrecs); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs); + + /* Grab the keys to the entries moved to the right block */ + xfs_btree_copy_keys(cur, key, rkp, 1); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + union xfs_btree_rec *rrp; /* right record pointer */ + + lrp = xfs_btree_rec_addr(cur, src_index, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, rrp, lrp, rrecs); + xfs_btree_log_recs(cur, rbp, 1, rrecs); + + cur->bc_ops->init_key_from_rec(key, + xfs_btree_rec_addr(cur, 1, right)); + } + + + /* + * Find the left block number by looking in the buffer. + * Adjust numrecs, sibling pointers. + */ + xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); + xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); + xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); + + lrecs -= rrecs; + xfs_btree_set_numrecs(left, lrecs); + xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); + + xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + + /* + * If there's a block to the new block's right, make that block + * point back to right instead of to left. + */ + if (!xfs_btree_ptr_is_null(cur, &rrptr)) { + error = xfs_btree_read_buf_block(cur, &rrptr, level, + 0, &rrblock, &rrbp); + if (error) + goto error0; + xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); + xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); + } + /* + * If the cursor is really in the right block, move it there. + * If it's just pointing past the last entry in left, then we'll + * insert there, so don't change anything in that case. + */ + if (cur->bc_ptrs[level] > lrecs + 1) { + xfs_btree_setbuf(cur, level, rbp); + cur->bc_ptrs[level] -= lrecs; + } + /* + * If there are more levels, we'll need another cursor which refers + * the right block, no matter where this cursor was. + */ + if (level + 1 < cur->bc_nlevels) { + error = xfs_btree_dup_cursor(cur, curp); + if (error) + goto error0; + (*curp)->bc_ptrs[level + 1]++; + } + *ptrp = rptr; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7cde287b5c9..354a6656fad 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -187,6 +187,12 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + /* block allocation / freeing */ + int (*alloc_block)(struct xfs_btree_cur *cur, + union xfs_btree_ptr *start_bno, + union xfs_btree_ptr *new_bno, + int length, int *stat); + /* update last record information */ void (*update_lastrec)(struct xfs_btree_cur *cur, struct xfs_btree_block *block, @@ -535,6 +541,8 @@ int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); int xfs_btree_lshift(struct xfs_btree_cur *, int, int *); int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); +int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, + union xfs_btree_key *, struct xfs_btree_cur **, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 60f5db5d6df..c76190a83e4 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -35,6 +35,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -44,8 +45,6 @@ STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); -STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *, - xfs_inobt_key_t *, xfs_btree_cur_t **, int *); /* * Single level of the xfs_inobt_delete record deletion routine. @@ -620,15 +619,18 @@ xfs_inobt_insrec( if (i) { optr = ptr = cur->bc_ptrs[level]; } else { + union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) }; /* * Next, try splitting the current block * in half. If this works we have to * re-set our variables because * we could be in a different block now. */ - if ((error = xfs_inobt_split(cur, level, &nbno, - &nkey, &ncur, &i))) + if ((error = xfs_btree_split(cur, level, &bno, + (union xfs_btree_key *)&nkey, + &ncur, &i))) return error; + nbno = be32_to_cpu(bno.s); if (i) { bp = cur->bc_bufs[level]; block = XFS_BUF_TO_INOBT_BLOCK(bp); @@ -972,165 +974,6 @@ xfs_inobt_newroot( return 0; } -/* - * Split cur/level block in half. - * Return new block number and its first record (to be inserted into parent). - */ -STATIC int /* error */ -xfs_inobt_split( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to split */ - xfs_agblock_t *bnop, /* output: block number allocated */ - xfs_inobt_key_t *keyp, /* output: first key of new block */ - xfs_btree_cur_t **curp, /* output: new cursor */ - int *stat) /* success/failure */ -{ - xfs_alloc_arg_t args; /* allocation argument structure */ - int error; /* error return value */ - int i; /* loop index/record number */ - xfs_agblock_t lbno; /* left (current) block number */ - xfs_buf_t *lbp; /* buffer for left block */ - xfs_inobt_block_t *left; /* left (current) btree block */ - xfs_inobt_key_t *lkp; /* left btree key pointer */ - xfs_inobt_ptr_t *lpp; /* left btree address pointer */ - xfs_inobt_rec_t *lrp; /* left btree record pointer */ - xfs_buf_t *rbp; /* buffer for right block */ - xfs_inobt_block_t *right; /* right (new) btree block */ - xfs_inobt_key_t *rkp; /* right btree key pointer */ - xfs_inobt_ptr_t *rpp; /* right btree address pointer */ - xfs_inobt_rec_t *rrp; /* right btree record pointer */ - - /* - * Set up left block (current one). - */ - lbp = cur->bc_bufs[level]; - args.tp = cur->bc_tp; - args.mp = cur->bc_mp; - lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp)); - /* - * Allocate the new block. - * If we can't do it, we're toast. Give up. - */ - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno); - args.mod = args.minleft = args.alignment = args.total = args.wasdel = - args.isfl = args.userdata = args.minalignslop = 0; - args.minlen = args.maxlen = args.prod = 1; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - if ((error = xfs_alloc_vextent(&args))) - return error; - if (args.fsbno == NULLFSBLOCK) { - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0); - /* - * Set up the new block as "right". - */ - right = XFS_BUF_TO_INOBT_BLOCK(rbp); - /* - * "Left" is the current (according to the cursor) block. - */ - left = XFS_BUF_TO_INOBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; -#endif - /* - * Fill in the btree header for the new block. - */ - right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - right->bb_level = left->bb_level; - right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2); - /* - * Make sure that if there's an odd number of entries now, that - * each new block will have the same number of entries. - */ - if ((be16_to_cpu(left->bb_numrecs) & 1) && - cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add_cpu(&right->bb_numrecs, 1); - i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; - /* - * For non-leaf blocks, copy keys and addresses over to the new block. - */ - if (level > 0) { - lkp = XFS_INOBT_KEY_ADDR(left, i, cur); - lpp = XFS_INOBT_PTR_ADDR(left, i, cur); - rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); - rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level))) - return error; - } -#endif - memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - *keyp = *rkp; - } - /* - * For leaf blocks, copy records over to the new block. - */ - else { - lrp = XFS_INOBT_REC_ADDR(left, i, cur); - rrp = XFS_INOBT_REC_ADDR(right, 1, cur); - memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - keyp->ir_startino = rrp->ir_startino; - } - /* - * Find the left block number by looking in the buffer. - * Adjust numrecs, sibling pointers. - */ - be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); - right->bb_rightsib = left->bb_rightsib; - left->bb_rightsib = cpu_to_be32(args.agbno); - right->bb_leftsib = cpu_to_be32(lbno); - xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS); - xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - /* - * If there's a block to the new block's right, make that block - * point back to right instead of to left. - */ - if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) { - xfs_inobt_block_t *rrblock; /* rr btree block */ - xfs_buf_t *rrbp; /* buffer for rrblock */ - - if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, - be32_to_cpu(right->bb_rightsib), 0, &rrbp, - XFS_INO_BTREE_REF))) - return error; - rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); - if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) - return error; - rrblock->bb_leftsib = cpu_to_be32(args.agbno); - xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB); - } - /* - * If the cursor is really in the right block, move it there. - * If it's just pointing past the last entry in left, then we'll - * insert there, so don't change anything in that case. - */ - if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { - xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); - } - /* - * If there are more levels, we'll need another cursor which refers - * the right block, no matter where this cursor was. - */ - if (level + 1 < cur->bc_nlevels) { - if ((error = xfs_btree_dup_cursor(cur, curp))) - return error; - (*curp)->bc_ptrs[level + 1]++; - } - *bnop = args.agbno; - *stat = 1; - return 0; -} - /* * Externally visible routines. */ @@ -1285,6 +1128,48 @@ xfs_inobt_dup_cursor( cur->bc_private.a.agbp, cur->bc_private.a.agno); } +STATIC int +xfs_inobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int length, + int *stat) +{ + xfs_alloc_arg_t args; /* block allocation args */ + int error; /* error return value */ + xfs_agblock_t sbno = be32_to_cpu(start->s); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + memset(&args, 0, sizeof(args)); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); + args.minlen = 1; + args.maxlen = 1; + args.prod = 1; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + + error = xfs_alloc_vextent(&args); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + if (args.fsbno == NULLFSBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + + new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); + *stat = 1; + return 0; +} + + STATIC int xfs_inobt_get_maxrecs( struct xfs_btree_cur *cur, @@ -1396,6 +1281,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .key_len = sizeof(xfs_inobt_key_t), .dup_cursor = xfs_inobt_dup_cursor, + .alloc_block = xfs_inobt_alloc_block, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, -- cgit v1.2.3 From 344207ce8474b79be331eb93e6df4cb5bdd48ab2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:16 +1100 Subject: [XFS] implement semi-generic xfs_btree_new_root From: Dave Chinner Add a xfs_btree_new_root helper for the alloc and ialloc btrees. The bmap btree needs it's own version and is not converted. [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32200a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 179 ++++++---------------------------------------- fs/xfs/xfs_btree.c | 129 +++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 5 ++ fs/xfs/xfs_ialloc_btree.c | 164 +++++------------------------------------- 4 files changed, 172 insertions(+), 305 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 8a8d1aeec52..f21a3e9cc3d 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -48,7 +48,6 @@ STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); /* * Internal functions. @@ -628,7 +627,7 @@ xfs_alloc_insrec( */ if (level >= cur->bc_nlevels) { XFS_STATS_INC(xs_abt_insrec); - if ((error = xfs_alloc_newroot(cur, &i))) + if ((error = xfs_btree_new_root(cur, &i))) return error; *bnop = NULLAGBLOCK; *stat = i; @@ -936,161 +935,6 @@ xfs_alloc_log_recs( xfs_trans_log_buf(cur->bc_tp, bp, first, last); } -/* - * Allocate a new root block, fill it in. - */ -STATIC int /* error */ -xfs_alloc_newroot( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - xfs_agblock_t lbno; /* left block number */ - xfs_buf_t *lbp; /* left btree buffer */ - xfs_alloc_block_t *left; /* left btree block */ - xfs_mount_t *mp; /* mount structure */ - xfs_agblock_t nbno; /* new block number */ - xfs_buf_t *nbp; /* new (root) buffer */ - xfs_alloc_block_t *new; /* new (root) btree block */ - int nptr; /* new value for key index, 1 or 2 */ - xfs_agblock_t rbno; /* right block number */ - xfs_buf_t *rbp; /* right btree buffer */ - xfs_alloc_block_t *right; /* right btree block */ - - mp = cur->bc_mp; - - ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp)); - /* - * Get a buffer from the freelist blocks, for the new root. - */ - error = xfs_alloc_get_freelist(cur->bc_tp, - cur->bc_private.a.agbp, &nbno, 1); - if (error) - return error; - /* - * None available, we fail. - */ - if (nbno == NULLAGBLOCK) { - *stat = 0; - return 0; - } - xfs_trans_agbtree_delta(cur->bc_tp, 1); - nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno, - 0); - new = XFS_BUF_TO_ALLOC_BLOCK(nbp); - /* - * Set the root data in the a.g. freespace structure. - */ - { - xfs_agf_t *agf; /* a.g. freespace header */ - xfs_agnumber_t seqno; - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno); - be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1); - seqno = be32_to_cpu(agf->agf_seqno); - mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++; - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_ROOTS | XFS_AGF_LEVELS); - } - /* - * At the previous root level there are now two blocks: the old - * root, and the new block generated when it was split. - * We don't know which one the cursor is pointing at, so we - * set up variables "left" and "right" for each case. - */ - lbp = cur->bc_bufs[cur->bc_nlevels - 1]; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp))) - return error; -#endif - if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) { - /* - * Our block is left, pick up the right block. - */ - lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp)); - rbno = be32_to_cpu(left->bb_rightsib); - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, rbno, 0, &rbp, - XFS_ALLOC_BTREE_REF))) - return error; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); - if ((error = xfs_btree_check_sblock(cur, right, - cur->bc_nlevels - 1, rbp))) - return error; - nptr = 1; - } else { - /* - * Our block is right, pick up the left block. - */ - rbp = lbp; - right = left; - rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp)); - lbno = be32_to_cpu(right->bb_leftsib); - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, lbno, 0, &lbp, - XFS_ALLOC_BTREE_REF))) - return error; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); - if ((error = xfs_btree_check_sblock(cur, left, - cur->bc_nlevels - 1, lbp))) - return error; - nptr = 2; - } - /* - * Fill in the new block's btree header and log it. - */ - new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - new->bb_level = cpu_to_be16(cur->bc_nlevels); - new->bb_numrecs = cpu_to_be16(2); - new->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - new->bb_rightsib = cpu_to_be32(NULLAGBLOCK); - xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS); - ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK); - /* - * Fill in the key data in the new root. - */ - { - xfs_alloc_key_t *kp; /* btree key pointer */ - - kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); - if (be16_to_cpu(left->bb_level) > 0) { - kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); - kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur); - } else { - xfs_alloc_rec_t *rp; /* btree record pointer */ - - rp = XFS_ALLOC_REC_ADDR(left, 1, cur); - kp[0].ar_startblock = rp->ar_startblock; - kp[0].ar_blockcount = rp->ar_blockcount; - rp = XFS_ALLOC_REC_ADDR(right, 1, cur); - kp[1].ar_startblock = rp->ar_startblock; - kp[1].ar_blockcount = rp->ar_blockcount; - } - } - xfs_alloc_log_keys(cur, nbp, 1, 2); - /* - * Fill in the pointer data in the new root. - */ - { - xfs_alloc_ptr_t *pp; /* btree address pointer */ - - pp = XFS_ALLOC_PTR_ADDR(new, 1, cur); - pp[0] = cpu_to_be32(lbno); - pp[1] = cpu_to_be32(rbno); - } - xfs_alloc_log_ptrs(cur, nbp, 1, 2); - /* - * Fix up the cursor. - */ - xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); - cur->bc_ptrs[cur->bc_nlevels] = nptr; - cur->bc_nlevels++; - *stat = 1; - return 0; -} - /* * Externally visible routines. @@ -1244,6 +1088,26 @@ xfs_allocbt_dup_cursor( cur->bc_btnum); } +STATIC void +xfs_allocbt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int inc) +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + int btnum = cur->bc_btnum; + + ASSERT(ptr->s != 0); + + agf->agf_roots[btnum] = ptr->s; + be32_add_cpu(&agf->agf_levels[btnum], inc); + cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc; + + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); +} + STATIC int xfs_allocbt_alloc_block( struct xfs_btree_cur *cur, @@ -1440,6 +1304,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .key_len = sizeof(xfs_alloc_key_t), .dup_cursor = xfs_allocbt_dup_cursor, + .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 80576695fbe..8de884c4dab 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -2467,3 +2467,132 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Allocate a new root block, fill it in. + */ +int /* error */ +xfs_btree_new_root( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* one half of the old root block */ + struct xfs_buf *bp; /* buffer containing block */ + int error; /* error return value */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + struct xfs_buf *nbp; /* new (root) buffer */ + struct xfs_btree_block *new; /* new (root) btree block */ + int nptr; /* new value for key index, 1 or 2 */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + union xfs_btree_ptr rptr; + union xfs_btree_ptr lptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, newroot); + + /* initialise our start point from the cursor */ + cur->bc_ops->init_ptr_from_cur(cur, &rptr); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); + if (error) + goto error0; + if (*stat == 0) + goto out0; + XFS_BTREE_STATS_INC(cur, alloc); + + /* Set up the new block. */ + error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); + if (error) + goto error0; + + /* Set the root in the holding structure increasing the level by 1. */ + cur->bc_ops->set_root(cur, &lptr, 1); + + /* + * At the previous root level there are now two blocks: the old root, + * and the new block generated when it was split. We don't know which + * one the cursor is pointing at, so we set up variables "left" and + * "right" for each case. + */ + block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); + if (error) + goto error0; +#endif + + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &rptr)) { + /* Our block is left, pick up the right block. */ + lbp = bp; + xfs_btree_buf_to_ptr(cur, lbp, &lptr); + left = block; + error = xfs_btree_read_buf_block(cur, &rptr, + cur->bc_nlevels - 1, 0, &right, &rbp); + if (error) + goto error0; + bp = rbp; + nptr = 1; + } else { + /* Our block is right, pick up the left block. */ + rbp = bp; + xfs_btree_buf_to_ptr(cur, rbp, &rptr); + right = block; + xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + error = xfs_btree_read_buf_block(cur, &lptr, + cur->bc_nlevels - 1, 0, &left, &lbp); + if (error) + goto error0; + bp = lbp; + nptr = 2; + } + /* Fill in the new block's btree header and log it. */ + xfs_btree_init_block(cur, cur->bc_nlevels, 2, new); + xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); + ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && + !xfs_btree_ptr_is_null(cur, &rptr)); + + /* Fill in the key data in the new root. */ + if (xfs_btree_get_level(left) > 0) { + xfs_btree_copy_keys(cur, + xfs_btree_key_addr(cur, 1, new), + xfs_btree_key_addr(cur, 1, left), 1); + xfs_btree_copy_keys(cur, + xfs_btree_key_addr(cur, 2, new), + xfs_btree_key_addr(cur, 1, right), 1); + } else { + cur->bc_ops->init_key_from_rec( + xfs_btree_key_addr(cur, 1, new), + xfs_btree_rec_addr(cur, 1, left)); + cur->bc_ops->init_key_from_rec( + xfs_btree_key_addr(cur, 2, new), + xfs_btree_rec_addr(cur, 1, right)); + } + xfs_btree_log_keys(cur, nbp, 1, 2); + + /* Fill in the pointer data in the new root. */ + xfs_btree_copy_ptrs(cur, + xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); + xfs_btree_copy_ptrs(cur, + xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); + xfs_btree_log_ptrs(cur, nbp, 1, 2); + + /* Fix up the cursor. */ + xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); + cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_nlevels++; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 354a6656fad..18015392feb 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -187,6 +187,10 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + /* update btree root pointer */ + void (*set_root)(struct xfs_btree_cur *cur, + union xfs_btree_ptr *nptr, int level_change); + /* block allocation / freeing */ int (*alloc_block)(struct xfs_btree_cur *cur, union xfs_btree_ptr *start_bno, @@ -543,6 +547,7 @@ int xfs_btree_lshift(struct xfs_btree_cur *, int, int *); int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, union xfs_btree_key *, struct xfs_btree_cur **, int *); +int xfs_btree_new_root(struct xfs_btree_cur *, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index c76190a83e4..7ba3c7bb398 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -44,7 +44,6 @@ STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int); STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); /* * Single level of the xfs_inobt_delete record deletion routine. @@ -556,7 +555,7 @@ xfs_inobt_insrec( * and we're done. */ if (level >= cur->bc_nlevels) { - error = xfs_inobt_newroot(cur, &i); + error = xfs_btree_new_root(cur, &i); *bnop = NULLAGBLOCK; *stat = i; return error; @@ -827,152 +826,6 @@ xfs_inobt_log_recs( xfs_trans_log_buf(cur->bc_tp, bp, first, last); } -/* - * Allocate a new root block, fill it in. - */ -STATIC int /* error */ -xfs_inobt_newroot( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - xfs_agi_t *agi; /* a.g. inode header */ - xfs_alloc_arg_t args; /* allocation argument structure */ - xfs_inobt_block_t *block; /* one half of the old root block */ - xfs_buf_t *bp; /* buffer containing block */ - int error; /* error return value */ - xfs_inobt_key_t *kp; /* btree key pointer */ - xfs_agblock_t lbno; /* left block number */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_inobt_block_t *left; /* left btree block */ - xfs_buf_t *nbp; /* new (root) buffer */ - xfs_inobt_block_t *new; /* new (root) btree block */ - int nptr; /* new value for key index, 1 or 2 */ - xfs_inobt_ptr_t *pp; /* btree address pointer */ - xfs_agblock_t rbno; /* right block number */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_inobt_block_t *right; /* right btree block */ - xfs_inobt_rec_t *rp; /* btree record pointer */ - - ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp)); - - /* - * Get a block & a buffer. - */ - agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); - args.tp = cur->bc_tp; - args.mp = cur->bc_mp; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, - be32_to_cpu(agi->agi_root)); - args.mod = args.minleft = args.alignment = args.total = args.wasdel = - args.isfl = args.userdata = args.minalignslop = 0; - args.minlen = args.maxlen = args.prod = 1; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - if ((error = xfs_alloc_vextent(&args))) - return error; - /* - * None available, we fail. - */ - if (args.fsbno == NULLFSBLOCK) { - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0); - new = XFS_BUF_TO_INOBT_BLOCK(nbp); - /* - * Set the root data in the a.g. inode structure. - */ - agi->agi_root = cpu_to_be32(args.agbno); - be32_add_cpu(&agi->agi_level, 1); - xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp, - XFS_AGI_ROOT | XFS_AGI_LEVEL); - /* - * At the previous root level there are now two blocks: the old - * root, and the new block generated when it was split. - * We don't know which one the cursor is pointing at, so we - * set up variables "left" and "right" for each case. - */ - bp = cur->bc_bufs[cur->bc_nlevels - 1]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp))) - return error; -#endif - if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) { - /* - * Our block is left, pick up the right block. - */ - lbp = bp; - lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp)); - left = block; - rbno = be32_to_cpu(left->bb_rightsib); - if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, - rbno, 0, &rbp, XFS_INO_BTREE_REF))) - return error; - bp = rbp; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); - if ((error = xfs_btree_check_sblock(cur, right, - cur->bc_nlevels - 1, rbp))) - return error; - nptr = 1; - } else { - /* - * Our block is right, pick up the left block. - */ - rbp = bp; - rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp)); - right = block; - lbno = be32_to_cpu(right->bb_leftsib); - if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, - lbno, 0, &lbp, XFS_INO_BTREE_REF))) - return error; - bp = lbp; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); - if ((error = xfs_btree_check_sblock(cur, left, - cur->bc_nlevels - 1, lbp))) - return error; - nptr = 2; - } - /* - * Fill in the new block's btree header and log it. - */ - new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - new->bb_level = cpu_to_be16(cur->bc_nlevels); - new->bb_numrecs = cpu_to_be16(2); - new->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - new->bb_rightsib = cpu_to_be32(NULLAGBLOCK); - xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS); - ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK); - /* - * Fill in the key data in the new root. - */ - kp = XFS_INOBT_KEY_ADDR(new, 1, cur); - if (be16_to_cpu(left->bb_level) > 0) { - kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); - kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); - } else { - rp = XFS_INOBT_REC_ADDR(left, 1, cur); - kp[0].ir_startino = rp->ir_startino; - rp = XFS_INOBT_REC_ADDR(right, 1, cur); - kp[1].ir_startino = rp->ir_startino; - } - xfs_inobt_log_keys(cur, nbp, 1, 2); - /* - * Fill in the pointer data in the new root. - */ - pp = XFS_INOBT_PTR_ADDR(new, 1, cur); - pp[0] = cpu_to_be32(lbno); - pp[1] = cpu_to_be32(rbno); - xfs_inobt_log_ptrs(cur, nbp, 1, 2); - /* - * Fix up the cursor. - */ - xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); - cur->bc_ptrs[cur->bc_nlevels] = nptr; - cur->bc_nlevels++; - *stat = 1; - return 0; -} /* * Externally visible routines. @@ -1128,6 +981,20 @@ xfs_inobt_dup_cursor( cur->bc_private.a.agbp, cur->bc_private.a.agno); } +STATIC void +xfs_inobt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *nptr, + int inc) /* level change */ +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + + agi->agi_root = nptr->s; + be32_add_cpu(&agi->agi_level, inc); + xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); +} + STATIC int xfs_inobt_alloc_block( struct xfs_btree_cur *cur, @@ -1281,6 +1148,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .key_len = sizeof(xfs_inobt_key_t), .dup_cursor = xfs_inobt_dup_cursor, + .set_root = xfs_inobt_set_root, .alloc_block = xfs_inobt_alloc_block, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, -- cgit v1.2.3 From ea77b0a66e6c910ef265d9af522d6303ea6b3055 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:28 +1100 Subject: [XFS] move xfs_bmbt_newroot to common code xfs_bmbt_newroot is a mostly generic implementation of moving from an inode root to a real block based root. So move it to xfs_btree.c where it can use all the nice infrastructure there and make it pointer size agnostic The new name for it is xfs_btree_new_iroot, following the old naming but making it clear we're dealing with the root in inode case here, and to avoid confusion with xfs_btree_new_root which is used for the not inode rooted case. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32201a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_bmap.c | 2 +- fs/xfs/xfs_bmap_btree.c | 113 +----------------------------------------------- fs/xfs/xfs_bmap_btree.h | 6 --- fs/xfs/xfs_btree.c | 101 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 1 + 5 files changed, 104 insertions(+), 119 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 7d6c4ace805..315bc291268 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -476,7 +476,7 @@ xfs_bmap_add_attrfork_btree( goto error0; /* must be at least one entry */ XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); - if ((error = xfs_bmbt_newroot(cur, flags, &stat))) + if ((error = xfs_btree_new_iroot(cur, flags, &stat))) goto error0; if (stat == 0) { xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index e7539263457..204f276aeaa 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -525,7 +525,7 @@ xfs_bmbt_insrec( cur->bc_private.b.whichfork); block = xfs_bmbt_get_block(cur, level, &bp); } else if (level == cur->bc_nlevels - 1) { - if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) || + if ((error = xfs_btree_new_iroot(cur, &logflags, stat)) || *stat == 0) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; @@ -1182,117 +1182,6 @@ xfs_bmbt_log_recs( XFS_BMBT_TRACE_CURSOR(cur, EXIT); } -/* - * Give the bmap btree a new root block. Copy the old broot contents - * down into a real block and make the broot point to it. - */ -int /* error */ -xfs_bmbt_newroot( - xfs_btree_cur_t *cur, /* btree cursor */ - int *logflags, /* logging flags for inode */ - int *stat) /* return status - 0 fail */ -{ - xfs_alloc_arg_t args; /* allocation arguments */ - xfs_bmbt_block_t *block; /* bmap btree block */ - xfs_buf_t *bp; /* buffer for block */ - xfs_bmbt_block_t *cblock; /* child btree block */ - xfs_bmbt_key_t *ckp; /* child key pointer */ - xfs_bmbt_ptr_t *cpp; /* child ptr pointer */ - int error; /* error return code */ -#ifdef DEBUG - int i; /* loop counter */ -#endif - xfs_bmbt_key_t *kp; /* pointer to bmap btree key */ - int level; /* btree level */ - xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - level = cur->bc_nlevels - 1; - block = xfs_bmbt_get_block(cur, level, &bp); - /* - * Copy the root into a real block. - */ - args.mp = cur->bc_mp; - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); - args.tp = cur->bc_tp; - args.fsbno = cur->bc_private.b.firstblock; - args.mod = args.minleft = args.alignment = args.total = args.isfl = - args.userdata = args.minalignslop = 0; - args.minlen = args.maxlen = args.prod = 1; - args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; - args.firstblock = args.fsbno; - if (args.fsbno == NULLFSBLOCK) { -#ifdef DEBUG - if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - args.fsbno = be64_to_cpu(*pp); - args.type = XFS_ALLOCTYPE_START_BNO; - } else if (cur->bc_private.b.flist->xbf_low) - args.type = XFS_ALLOCTYPE_START_BNO; - else - args.type = XFS_ALLOCTYPE_NEAR_BNO; - if ((error = xfs_alloc_vextent(&args))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (args.fsbno == NULLFSBLOCK) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - cur->bc_private.b.firstblock = args.fsbno; - cur->bc_private.b.allocated++; - cur->bc_private.b.ip->i_d.di_nblocks++; - XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, - XFS_TRANS_DQ_BCOUNT, 1L); - bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0); - cblock = XFS_BUF_TO_BMBT_BLOCK(bp); - *cblock = *block; - be16_add_cpu(&block->bb_level, 1); - block->bb_numrecs = cpu_to_be16(1); - cur->bc_nlevels++; - cur->bc_ptrs[level + 1] = 1; - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur); - memcpy(ckp, kp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*kp)); - cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp)); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - *pp = cpu_to_be64(args.fsbno); - xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs), - cur->bc_private.b.whichfork); - xfs_btree_setbuf(cur, level, bp); - /* - * Do all this logging at the end so that - * the root is at the right level. - */ - xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS); - xfs_bmbt_log_keys(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs)); - xfs_bmbt_log_ptrs(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs)); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *logflags |= - XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork); - *stat = 1; - return 0; -} - /* * Set all the fields in a bmap extent record from the arguments. */ diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 6bfd62ec54f..26fd8ace3e7 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -255,12 +255,6 @@ extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); -/* - * Give the bmap btree a new root block. Copy the old broot contents - * down into a real block and make the broot point to it. - */ -extern int xfs_bmbt_newroot(struct xfs_btree_cur *cur, int *lflags, int *stat); - extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 8de884c4dab..3b6e01dea66 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -2468,6 +2468,107 @@ error0: return error; } +/* + * Copy the old inode root contents into a real block and make the + * broot point to it. + */ +int /* error */ +xfs_btree_new_iroot( + struct xfs_btree_cur *cur, /* btree cursor */ + int *logflags, /* logging flags for inode */ + int *stat) /* return status - 0 fail */ +{ + struct xfs_buf *cbp; /* buffer for cblock */ + struct xfs_btree_block *block; /* btree block */ + struct xfs_btree_block *cblock; /* child btree block */ + union xfs_btree_key *ckp; /* child key pointer */ + union xfs_btree_ptr *cpp; /* child ptr pointer */ + union xfs_btree_key *kp; /* pointer to btree key */ + union xfs_btree_ptr *pp; /* pointer to block addr */ + union xfs_btree_ptr nptr; /* new block addr */ + int level; /* btree level */ + int error; /* error return code */ +#ifdef DEBUG + int i; /* loop counter */ +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, newroot); + + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + + level = cur->bc_nlevels - 1; + + block = xfs_btree_get_iroot(cur); + pp = xfs_btree_ptr_addr(cur, 1, block); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); + if (error) + goto error0; + if (*stat == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + } + XFS_BTREE_STATS_INC(cur, alloc); + + /* Copy the root into a real block. */ + error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); + if (error) + goto error0; + + memcpy(cblock, block, xfs_btree_block_len(cur)); + + be16_add_cpu(&block->bb_level, 1); + xfs_btree_set_numrecs(block, 1); + cur->bc_nlevels++; + cur->bc_ptrs[level + 1] = 1; + + kp = xfs_btree_key_addr(cur, 1, block); + ckp = xfs_btree_key_addr(cur, 1, cblock); + xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); + + cpp = xfs_btree_ptr_addr(cur, 1, cblock); +#ifdef DEBUG + for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { + error = xfs_btree_check_ptr(cur, pp, i, level); + if (error) + goto error0; + } +#endif + xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, &nptr, 0, level); + if (error) + goto error0; +#endif + xfs_btree_copy_ptrs(cur, pp, &nptr, 1); + + xfs_iroot_realloc(cur->bc_private.b.ip, + 1 - xfs_btree_get_numrecs(cblock), + cur->bc_private.b.whichfork); + + xfs_btree_setbuf(cur, level, cbp); + + /* + * Do all this logging at the end so that + * the root is at the right level. + */ + xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); + xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); + xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); + + *logflags |= + XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork); + *stat = 1; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + /* * Allocate a new root block, fill it in. */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 18015392feb..21eec863f00 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -548,6 +548,7 @@ int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, union xfs_btree_key *, struct xfs_btree_cur **, int *); int xfs_btree_new_root(struct xfs_btree_cur *, int *); +int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); /* * Helpers. -- cgit v1.2.3 From 4b22a57188d87e873346b73c227607715be96399 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:40 +1100 Subject: [XFS] implement generic xfs_btree_insert/insrec Make the btree insert code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32202a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc.c | 10 +- fs/xfs/xfs_alloc_btree.c | 339 ++++--------------------------------------- fs/xfs/xfs_alloc_btree.h | 6 - fs/xfs/xfs_bmap.c | 20 +-- fs/xfs/xfs_bmap_btree.c | 308 +++++++-------------------------------- fs/xfs/xfs_bmap_btree.h | 1 - fs/xfs/xfs_btree.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 11 ++ fs/xfs/xfs_ialloc.c | 2 +- fs/xfs/xfs_ialloc_btree.c | 302 +++----------------------------------- fs/xfs/xfs_ialloc_btree.h | 6 - 11 files changed, 494 insertions(+), 873 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 875e1bae194..a983824c12b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -408,7 +408,7 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } @@ -416,7 +416,7 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } @@ -444,7 +444,7 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(bno_cur, &i))) + if ((error = xfs_btree_insert(bno_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } @@ -1756,7 +1756,7 @@ xfs_free_ag_extent( else { nbno = bno; nlen = len; - if ((error = xfs_alloc_insert(bno_cur, &i))) + if ((error = xfs_btree_insert(bno_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } @@ -1768,7 +1768,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 0, error0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index f21a3e9cc3d..818adca77fc 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -582,256 +582,6 @@ error0: return error; } -/* - * Insert one record/level. Return information to the caller - * allowing the next level up to proceed if necessary. - */ -STATIC int /* error */ -xfs_alloc_insrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to insert record at */ - xfs_agblock_t *bnop, /* i/o: block number inserted */ - xfs_alloc_rec_t *recp, /* i/o: record data inserted */ - xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ - int *stat) /* output: success/failure */ -{ - xfs_agf_t *agf; /* allocation group freelist header */ - xfs_alloc_block_t *block; /* btree block record/key lives in */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_alloc_key_t key; /* key value being inserted */ - xfs_alloc_key_t *kp; /* pointer to btree keys */ - xfs_agblock_t nbno; /* block number of allocated block */ - xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ - xfs_alloc_key_t nkey; /* new key value, from split */ - xfs_alloc_rec_t nrec; /* new record value, for caller */ - int numrecs; - int optr; /* old ptr value */ - xfs_alloc_ptr_t *pp; /* pointer to btree addresses */ - int ptr; /* index in btree block for this rec */ - xfs_alloc_rec_t *rp; /* pointer to btree records */ - - ASSERT(be32_to_cpu(recp->ar_blockcount) > 0); - - /* - * GCC doesn't understand the (arguably complex) control flow in - * this function and complains about uninitialized structure fields - * without this. - */ - memset(&nrec, 0, sizeof(nrec)); - - /* - * If we made it to the root level, allocate a new root block - * and we're done. - */ - if (level >= cur->bc_nlevels) { - XFS_STATS_INC(xs_abt_insrec); - if ((error = xfs_btree_new_root(cur, &i))) - return error; - *bnop = NULLAGBLOCK; - *stat = i; - return 0; - } - /* - * Make a key out of the record data to be inserted, and save it. - */ - key.ar_startblock = recp->ar_startblock; - key.ar_blockcount = recp->ar_blockcount; - optr = ptr = cur->bc_ptrs[level]; - /* - * If we're off the left edge, return failure. - */ - if (ptr == 0) { - *stat = 0; - return 0; - } - XFS_STATS_INC(xs_abt_insrec); - /* - * Get pointers to the btree buffer and block. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - numrecs = be16_to_cpu(block->bb_numrecs); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; - /* - * Check that the new entry is being inserted in the right place. - */ - if (ptr <= numrecs) { - if (level == 0) { - rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); - xfs_btree_check_rec(cur->bc_btnum, recp, rp); - } else { - kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); - xfs_btree_check_key(cur->bc_btnum, &key, kp); - } - } -#endif - nbno = NULLAGBLOCK; - ncur = NULL; - /* - * If the block is full, we can't insert the new entry until we - * make the block un-full. - */ - if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - /* - * First, try shifting an entry to the right neighbor. - */ - if ((error = xfs_btree_rshift(cur, level, &i))) - return error; - if (i) { - /* nothing */ - } - /* - * Next, try shifting an entry to the left neighbor. - */ - else { - if ((error = xfs_btree_lshift(cur, level, &i))) - return error; - if (i) - optr = ptr = cur->bc_ptrs[level]; - else { - union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) }; - /* - * Next, try splitting the current block in - * half. If this works we have to re-set our - * variables because we could be in a - * different block now. - */ - if ((error = xfs_btree_split(cur, level, &bno, - (union xfs_btree_key *)&nkey, - &ncur, &i))) - return error; - nbno = be32_to_cpu(bno.s); - if (i) { - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = - xfs_btree_check_sblock(cur, - block, level, bp))) - return error; -#endif - ptr = cur->bc_ptrs[level]; - nrec.ar_startblock = nkey.ar_startblock; - nrec.ar_blockcount = nkey.ar_blockcount; - } - /* - * Otherwise the insert fails. - */ - else { - *stat = 0; - return 0; - } - } - } - } - /* - * At this point we know there's room for our new entry in the block - * we're pointing at. - */ - numrecs = be16_to_cpu(block->bb_numrecs); - if (level > 0) { - /* - * It's a non-leaf entry. Make a hole for the new data - * in the key and ptr regions of the block. - */ - kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); - pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); -#ifdef DEBUG - for (i = numrecs; i >= ptr; i--) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level))) - return error; - } -#endif - memmove(&kp[ptr], &kp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*kp)); - memmove(&pp[ptr], &pp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*pp)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, *bnop, level))) - return error; -#endif - /* - * Now stuff the new data in, bump numrecs and log the new data. - */ - kp[ptr - 1] = key; - pp[ptr - 1] = cpu_to_be32(*bnop); - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_alloc_log_keys(cur, bp, ptr, numrecs); - xfs_alloc_log_ptrs(cur, bp, ptr, numrecs); -#ifdef DEBUG - if (ptr < numrecs) - xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, - kp + ptr); -#endif - } else { - /* - * It's a leaf entry. Make a hole for the new record. - */ - rp = XFS_ALLOC_REC_ADDR(block, 1, cur); - memmove(&rp[ptr], &rp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*rp)); - /* - * Now stuff the new record in, bump numrecs - * and log the new data. - */ - rp[ptr - 1] = *recp; - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_alloc_log_recs(cur, bp, ptr, numrecs); -#ifdef DEBUG - if (ptr < numrecs) - xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, - rp + ptr); -#endif - } - /* - * Log the new number of records in the btree header. - */ - xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); - /* - * If we inserted at the start of a block, update the parents' keys. - */ - if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) - return error; - /* - * Look to see if the longest extent in the allocation group - * needs to be updated. - */ - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - if (level == 0 && - cur->bc_btnum == XFS_BTNUM_CNT && - be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && - be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) { - /* - * If this is a leaf in the by-size btree and there - * is no right sibling block and this block is bigger - * than the previous longest block, update it. - */ - agf->agf_longest = recp->ar_blockcount; - cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest - = be32_to_cpu(recp->ar_blockcount); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_LONGEST); - } - /* - * Return the new block number, if any. - * If there is one, give back a record value and a cursor too. - */ - *bnop = nbno; - if (nbno != NULLAGBLOCK) { - *recp = nrec; - *curp = ncur; - } - *stat = 1; - return 0; -} - /* * Log header fields from a btree block. */ @@ -1019,65 +769,6 @@ xfs_alloc_get_rec( return 0; } -/* - * Insert the current record at the point referenced by cur. - * The cursor may be inconsistent on return if splits have been done. - */ -int /* error */ -xfs_alloc_insert( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* result value, 0 for failure */ - int level; /* current level number in btree */ - xfs_agblock_t nbno; /* new block number (split result) */ - xfs_btree_cur_t *ncur; /* new cursor (split result) */ - xfs_alloc_rec_t nrec; /* record being inserted this level */ - xfs_btree_cur_t *pcur; /* previous level's cursor */ - - level = 0; - nbno = NULLAGBLOCK; - nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); - nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); - ncur = NULL; - pcur = cur; - /* - * Loop going up the tree, starting at the leaf level. - * Stop when we don't get a split block, that must mean that - * the insert is finished with this level. - */ - do { - /* - * Insert nrec/nbno into this level of the tree. - * Note if we fail, nbno will be null. - */ - if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur, - &i))) { - if (pcur != cur) - xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); - return error; - } - /* - * See if the cursor we just used is trash. - * Can't trash the caller's cursor, but otherwise we should - * if ncur is a new cursor or we're about to be done. - */ - if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) { - cur->bc_nlevels = pcur->bc_nlevels; - xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); - } - /* - * If we got a new cursor, switch to it. - */ - if (ncur) { - pcur = ncur; - ncur = NULL; - } - } while (nbno != NULLAGBLOCK); - *stat = i; - return 0; -} STATIC struct xfs_btree_cur * xfs_allocbt_dup_cursor( @@ -1170,6 +861,12 @@ xfs_allocbt_update_lastrec( return; len = rec->alloc.ar_blockcount; break; + case LASTREC_INSREC: + if (be32_to_cpu(rec->alloc.ar_blockcount) <= + be32_to_cpu(agf->agf_longest)) + return; + len = rec->alloc.ar_blockcount; + break; default: ASSERT(0); return; @@ -1199,6 +896,28 @@ xfs_allocbt_init_key_from_rec( key->alloc.ar_blockcount = rec->alloc.ar_blockcount; } +STATIC void +xfs_allocbt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(key->alloc.ar_startblock != 0); + + rec->alloc.ar_startblock = key->alloc.ar_startblock; + rec->alloc.ar_blockcount = key->alloc.ar_blockcount; +} + +STATIC void +xfs_allocbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + ASSERT(cur->bc_rec.a.ar_startblock != 0); + + rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); + rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); +} + STATIC void xfs_allocbt_init_ptr_from_cur( struct xfs_btree_cur *cur, @@ -1309,6 +1028,8 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_rec_from_key = xfs_allocbt_init_rec_from_key, + .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 81e2f360781..2e340ef8025 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -107,12 +107,6 @@ extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat); extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, xfs_extlen_t *len, int *stat); -/* - * Insert the current record at the point referenced by cur. - * The cursor may be inconsistent on return if splits have been done. - */ -extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat); - extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 315bc291268..85e2e8b9cf4 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -977,7 +977,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1053,7 +1053,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1143,7 +1143,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1198,7 +1198,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1651,7 +1651,7 @@ xfs_bmap_add_extent_unwritten_real( oldext))) goto done; cur->bc_rec.b = *new; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1741,7 +1741,7 @@ xfs_bmap_add_extent_unwritten_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1789,7 +1789,7 @@ xfs_bmap_add_extent_unwritten_real( cur->bc_rec.b = PREV; cur->bc_rec.b.br_blockcount = new->br_startoff - PREV.br_startoff; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); /* @@ -1804,7 +1804,7 @@ xfs_bmap_add_extent_unwritten_real( XFS_WANT_CORRUPTED_GOTO(i == 0, done); /* new middle extent - newext */ cur->bc_rec.b.br_state = new->br_state; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -2264,7 +2264,7 @@ xfs_bmap_add_extent_hole_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = new->br_state; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -3303,7 +3303,7 @@ xfs_bmap_del_extent( if ((error = xfs_btree_increment(cur, 0, &i))) goto done; cur->bc_rec.b = new; - error = xfs_bmbt_insert(cur, &i); + error = xfs_btree_insert(cur, &i); if (error && error != ENOSPC) goto done; /* diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 204f276aeaa..2b15df32b7d 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -456,198 +456,6 @@ error0: return error; } -/* - * Insert one record/level. Return information to the caller - * allowing the next level up to proceed if necessary. - */ -STATIC int /* error */ -xfs_bmbt_insrec( - xfs_btree_cur_t *cur, - int level, - xfs_fsblock_t *bnop, - xfs_bmbt_rec_t *recp, - xfs_btree_cur_t **curp, - int *stat) /* no-go/done/continue */ -{ - xfs_bmbt_block_t *block; /* bmap btree block */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_bmbt_key_t key; /* bmap btree key */ - xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */ - int logflags; /* inode logging flags */ - xfs_fsblock_t nbno; /* new block number */ - struct xfs_btree_cur *ncur; /* new btree cursor */ - __uint64_t startoff; /* new btree key value */ - xfs_bmbt_rec_t nrec; /* new record count */ - int optr; /* old key/record index */ - xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ - int ptr; /* key/record index */ - xfs_bmbt_rec_t *rp=NULL; /* pointer to bmap btree rec */ - int numrecs; - - ASSERT(level < cur->bc_nlevels); - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp); - ncur = NULL; - key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp)); - optr = ptr = cur->bc_ptrs[level]; - if (ptr == 0) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - XFS_STATS_INC(xs_bmbt_insrec); - block = xfs_bmbt_get_block(cur, level, &bp); - numrecs = be16_to_cpu(block->bb_numrecs); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (ptr <= numrecs) { - if (level == 0) { - rp = XFS_BMAP_REC_IADDR(block, ptr, cur); - xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp); - } else { - kp = XFS_BMAP_KEY_IADDR(block, ptr, cur); - xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp); - } - } -#endif - nbno = NULLFSBLOCK; - if (numrecs == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - if (numrecs < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) { - /* - * A root block, that can be made bigger. - */ - xfs_iroot_realloc(cur->bc_private.b.ip, 1, - cur->bc_private.b.whichfork); - block = xfs_bmbt_get_block(cur, level, &bp); - } else if (level == cur->bc_nlevels - 1) { - if ((error = xfs_btree_new_iroot(cur, &logflags, stat)) || - *stat == 0) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - logflags); - block = xfs_bmbt_get_block(cur, level, &bp); - } else { - if ((error = xfs_btree_rshift(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (i) { - /* nothing */ - } else { - if ((error = xfs_btree_lshift(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (i) { - optr = ptr = cur->bc_ptrs[level]; - } else { - union xfs_btree_ptr bno = { .l = cpu_to_be64(nbno) }; - union xfs_btree_key skey; - if ((error = xfs_btree_split(cur, level, - &bno, &skey, &ncur, - &i))) { - XFS_BMBT_TRACE_CURSOR(cur, - ERROR); - return error; - } - nbno = be64_to_cpu(bno.l); - startoff = be64_to_cpu(skey.bmbt.br_startoff); - if (i) { - block = xfs_bmbt_get_block( - cur, level, &bp); -#ifdef DEBUG - if ((error = - xfs_btree_check_lblock(cur, - block, level, bp))) { - XFS_BMBT_TRACE_CURSOR( - cur, ERROR); - return error; - } -#endif - ptr = cur->bc_ptrs[level]; - xfs_bmbt_disk_set_allf(&nrec, - startoff, 0, 0, - XFS_EXT_NORM); - } else { - XFS_BMBT_TRACE_CURSOR(cur, - EXIT); - *stat = 0; - return 0; - } - } - } - } - } - numrecs = be16_to_cpu(block->bb_numrecs); - if (level > 0) { - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); -#ifdef DEBUG - for (i = numrecs; i >= ptr; i--) { - if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1], - level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memmove(&kp[ptr], &kp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*kp)); - memmove(&pp[ptr], &pp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*pp)); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, *bnop, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - kp[ptr - 1] = key; - pp[ptr - 1] = cpu_to_be64(*bnop); - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_bmbt_log_keys(cur, bp, ptr, numrecs); - xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs); - } else { - rp = XFS_BMAP_REC_IADDR(block, 1, cur); - memmove(&rp[ptr], &rp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*rp)); - rp[ptr - 1] = *recp; - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_bmbt_log_recs(cur, bp, ptr, numrecs); - } - xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS); -#ifdef DEBUG - if (ptr < numrecs) { - if (level == 0) - xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1, - rp + ptr); - else - xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1, - kp + ptr); - } -#endif - if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - *bnop = nbno; - if (nbno != NULLFSBLOCK) { - *recp = nrec; - *curp = ncur; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - STATIC int xfs_bmbt_killroot( xfs_btree_cur_t *cur) @@ -1059,67 +867,6 @@ xfs_bmbt_disk_get_startoff( XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; } -/* - * Insert the current record at the point referenced by cur. - * - * A multi-level split of the tree on insert will invalidate the original - * cursor. All callers of this function should assume that the cursor is - * no longer valid and revalidate it. - */ -int /* error */ -xfs_bmbt_insert( - xfs_btree_cur_t *cur, - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; - int level; - xfs_fsblock_t nbno; - xfs_btree_cur_t *ncur; - xfs_bmbt_rec_t nrec; - xfs_btree_cur_t *pcur; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - level = 0; - nbno = NULLFSBLOCK; - xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); - ncur = NULL; - pcur = cur; - do { - if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur, - &i))) { - if (pcur != cur) - xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { - cur->bc_nlevels = pcur->bc_nlevels; - cur->bc_private.b.allocated += - pcur->bc_private.b.allocated; - pcur->bc_private.b.allocated = 0; - ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) || - XFS_IS_REALTIME_INODE(cur->bc_private.b.ip)); - cur->bc_private.b.firstblock = - pcur->bc_private.b.firstblock; - ASSERT(cur->bc_private.b.flist == - pcur->bc_private.b.flist); - xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); - } - if (ncur) { - pcur = ncur; - ncur = NULL; - } - } while (nbno != NULLFSBLOCK); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = i; - return 0; -error0: - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; -} - /* * Log fields from the btree block header. */ @@ -1450,6 +1197,21 @@ xfs_bmbt_dup_cursor( return new; } +STATIC void +xfs_bmbt_update_cursor( + struct xfs_btree_cur *src, + struct xfs_btree_cur *dst) +{ + ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || + (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); + ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist); + + dst->bc_private.b.allocated += src->bc_private.b.allocated; + dst->bc_private.b.firstblock = src->bc_private.b.firstblock; + + src->bc_private.b.allocated = 0; +} + STATIC int xfs_bmbt_alloc_block( struct xfs_btree_cur *cur, @@ -1544,6 +1306,23 @@ xfs_bmbt_get_maxrecs( return XFS_BMAP_BLOCK_IMAXRECS(level, cur); } +/* + * Get the maximum records we could store in the on-disk format. + * + * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but + * for the root node this checks the available space in the dinode fork + * so that we can resize the in-memory buffer to match it. After a + * resize to the maximum size this function returns the same value + * as xfs_bmbt_get_maxrecs for the root node, too. + */ +STATIC int +xfs_bmbt_get_dmaxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return XFS_BMAP_BLOCK_DMAXRECS(level, cur); +} + STATIC void xfs_bmbt_init_key_from_rec( union xfs_btree_key *key, @@ -1553,6 +1332,25 @@ xfs_bmbt_init_key_from_rec( cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); } +STATIC void +xfs_bmbt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(key->bmbt.br_startoff != 0); + + xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff), + 0, 0, XFS_EXT_NORM); +} + +STATIC void +xfs_bmbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); +} + STATIC void xfs_bmbt_init_ptr_from_cur( struct xfs_btree_cur *cur, @@ -1660,9 +1458,13 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .key_len = sizeof(xfs_bmbt_key_t), .dup_cursor = xfs_bmbt_dup_cursor, + .update_cursor = xfs_bmbt_update_cursor, .alloc_block = xfs_bmbt_alloc_block, .get_maxrecs = xfs_bmbt_get_maxrecs, + .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, + .init_rec_from_key = xfs_bmbt_init_rec_from_key, + .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 26fd8ace3e7..703fe2e3434 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -250,7 +250,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); -extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *); extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 3b6e01dea66..36477aae77d 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -963,6 +963,17 @@ xfs_btree_ptr_is_null( return be32_to_cpu(ptr->s) == NULLAGBLOCK; } +STATIC void +xfs_btree_set_ptr_null( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + ptr->l = cpu_to_be64(NULLFSBLOCK); + else + ptr->s = cpu_to_be32(NULLAGBLOCK); +} + /* * Get/set/init sibling pointers */ @@ -2697,3 +2708,354 @@ out0: *stat = 0; return 0; } + +STATIC int +xfs_btree_make_block_unfull( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* btree level */ + int numrecs,/* # of recs in block */ + int *oindex,/* old tree index */ + int *index, /* new tree index */ + union xfs_btree_ptr *nptr, /* new btree ptr */ + struct xfs_btree_cur **ncur, /* new btree cursor */ + union xfs_btree_rec *nrec, /* new record */ + int *stat) +{ + union xfs_btree_key key; /* new btree key value */ + int error = 0; + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 1) { + struct xfs_inode *ip = cur->bc_private.b.ip; + + if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { + /* A root block that can be made bigger. */ + + xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); + } else { + /* A root block that needs replacing */ + int logflags = 0; + + error = xfs_btree_new_iroot(cur, &logflags, stat); + if (error || *stat == 0) + return error; + + xfs_trans_log_inode(cur->bc_tp, ip, logflags); + } + + return 0; + } + + /* First, try shifting an entry to the right neighbor. */ + error = xfs_btree_rshift(cur, level, stat); + if (error || *stat) + return error; + + /* Next, try shifting an entry to the left neighbor. */ + error = xfs_btree_lshift(cur, level, stat); + if (error) + return error; + + if (*stat) { + *oindex = *index = cur->bc_ptrs[level]; + return 0; + } + + /* + * Next, try splitting the current block in half. + * + * If this works we have to re-set our variables because we + * could be in a different block now. + */ + error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); + if (error || *stat == 0) + return error; + + + *index = cur->bc_ptrs[level]; + cur->bc_ops->init_rec_from_key(&key, nrec); + return 0; +} + +/* + * Insert one record/level. Return information to the caller + * allowing the next level up to proceed if necessary. + */ +STATIC int +xfs_btree_insrec( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level to insert record at */ + union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ + union xfs_btree_rec *recp, /* i/o: record data inserted */ + struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* btree block */ + struct xfs_buf *bp; /* buffer for block */ + union xfs_btree_key key; /* btree key */ + union xfs_btree_ptr nptr; /* new block ptr */ + struct xfs_btree_cur *ncur; /* new btree cursor */ + union xfs_btree_rec nrec; /* new record count */ + int optr; /* old key/record index */ + int ptr; /* key/record index */ + int numrecs;/* number of records */ + int error; /* error return value */ +#ifdef DEBUG + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); + + ncur = NULL; + + /* + * If we have an external root pointer, and we've made it to the + * root level, allocate a new root block and we're done. + */ + if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level >= cur->bc_nlevels)) { + error = xfs_btree_new_root(cur, stat); + xfs_btree_set_ptr_null(cur, ptrp); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return error; + } + + /* If we're off the left edge, return failure. */ + ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Make a key out of the record data to be inserted, and save it. */ + cur->bc_ops->init_key_from_rec(&key, recp); + + optr = ptr; + + XFS_BTREE_STATS_INC(cur, insrec); + + /* Get pointers to the btree buffer and block. */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; + + /* Check that the new entry is being inserted in the right place. */ + if (ptr <= numrecs) { + if (level == 0) { + xfs_btree_check_rec(cur->bc_btnum, recp, + xfs_btree_rec_addr(cur, ptr, block)); + } else { + xfs_btree_check_key(cur->bc_btnum, &key, + xfs_btree_key_addr(cur, ptr, block)); + } + } +#endif + + /* + * If the block is full, we can't insert the new entry until we + * make the block un-full. + */ + xfs_btree_set_ptr_null(cur, &nptr); + if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { + error = xfs_btree_make_block_unfull(cur, level, numrecs, + &optr, &ptr, &nptr, &ncur, &nrec, stat); + if (error || *stat == 0) + goto error0; + } + + /* + * The current block may have changed if the block was + * previously full and we have just made space in it. + */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + return error; +#endif + + /* + * At this point we know there's room for our new entry in the block + * we're pointing at. + */ + XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); + + if (level > 0) { + /* It's a nonleaf. make a hole in the keys and ptrs */ + union xfs_btree_key *kp; + union xfs_btree_ptr *pp; + + kp = xfs_btree_key_addr(cur, ptr, block); + pp = xfs_btree_ptr_addr(cur, ptr, block); + +#ifdef DEBUG + for (i = numrecs - ptr; i >= 0; i--) { + error = xfs_btree_check_ptr(cur, pp, i, level); + if (error) + return error; + } +#endif + + xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); + xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, ptrp, 0, level); + if (error) + goto error0; +#endif + + /* Now put the new data in, bump numrecs and log it. */ + xfs_btree_copy_keys(cur, kp, &key, 1); + xfs_btree_copy_ptrs(cur, pp, ptrp, 1); + numrecs++; + xfs_btree_set_numrecs(block, numrecs); + xfs_btree_log_ptrs(cur, bp, ptr, numrecs); + xfs_btree_log_keys(cur, bp, ptr, numrecs); +#ifdef DEBUG + if (ptr < numrecs) { + xfs_btree_check_key(cur->bc_btnum, kp, + xfs_btree_key_addr(cur, ptr + 1, block)); + } +#endif + } else { + /* It's a leaf. make a hole in the records */ + union xfs_btree_rec *rp; + + rp = xfs_btree_rec_addr(cur, ptr, block); + + xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); + + /* Now put the new data in, bump numrecs and log it. */ + xfs_btree_copy_recs(cur, rp, recp, 1); + xfs_btree_set_numrecs(block, ++numrecs); + xfs_btree_log_recs(cur, bp, ptr, numrecs); +#ifdef DEBUG + if (ptr < numrecs) { + xfs_btree_check_rec(cur->bc_btnum, rp, + xfs_btree_rec_addr(cur, ptr + 1, block)); + } +#endif + } + + /* Log the new number of records in the btree header. */ + xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); + + /* If we inserted at the start of a block, update the parents' keys. */ + if (optr == 1) { + error = xfs_btree_updkey(cur, &key, level + 1); + if (error) + goto error0; + } + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, level)) { + cur->bc_ops->update_lastrec(cur, block, recp, + ptr, LASTREC_INSREC); + } + + /* + * Return the new block number, if any. + * If there is one, give back a record value and a cursor too. + */ + *ptrp = nptr; + if (!xfs_btree_ptr_is_null(cur, &nptr)) { + *recp = nrec; + *curp = ncur; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Insert the record at the point referenced by cur. + * + * A multi-level split of the tree on insert will invalidate the original + * cursor. All callers of this function should assume that the cursor is + * no longer valid and revalidate it. + */ +int +xfs_btree_insert( + struct xfs_btree_cur *cur, + int *stat) +{ + int error; /* error return value */ + int i; /* result value, 0 for failure */ + int level; /* current level number in btree */ + union xfs_btree_ptr nptr; /* new block number (split result) */ + struct xfs_btree_cur *ncur; /* new cursor (split result) */ + struct xfs_btree_cur *pcur; /* previous level's cursor */ + union xfs_btree_rec rec; /* record to insert */ + + level = 0; + ncur = NULL; + pcur = cur; + + xfs_btree_set_ptr_null(cur, &nptr); + cur->bc_ops->init_rec_from_cur(cur, &rec); + + /* + * Loop going up the tree, starting at the leaf level. + * Stop when we don't get a split block, that must mean that + * the insert is finished with this level. + */ + do { + /* + * Insert nrec/nptr into this level of the tree. + * Note if we fail, nptr will be null. + */ + error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); + if (error) { + if (pcur != cur) + xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); + goto error0; + } + + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + level++; + + /* + * See if the cursor we just used is trash. + * Can't trash the caller's cursor, but otherwise we should + * if ncur is a new cursor or we're about to be done. + */ + if (pcur != cur && + (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { + /* Save the state from the cursor before we trash it */ + if (cur->bc_ops->update_cursor) + cur->bc_ops->update_cursor(pcur, cur); + cur->bc_nlevels = pcur->bc_nlevels; + xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); + } + /* If we got a new cursor, switch to it. */ + if (ncur) { + pcur = ncur; + ncur = NULL; + } + } while (!xfs_btree_ptr_is_null(cur, &nptr)); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = i; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 21eec863f00..6f03871f599 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -186,6 +186,8 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + void (*update_cursor)(struct xfs_btree_cur *src, + struct xfs_btree_cur *dst); /* update btree root pointer */ void (*set_root)(struct xfs_btree_cur *cur, @@ -206,9 +208,16 @@ struct xfs_btree_ops { /* records in block/level */ int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); + /* records on disk. Matter for the root in inode case. */ + int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level); + /* init values of btree structures */ void (*init_key_from_rec)(union xfs_btree_key *key, union xfs_btree_rec *rec); + void (*init_rec_from_key)(union xfs_btree_key *key, + union xfs_btree_rec *rec); + void (*init_rec_from_cur)(struct xfs_btree_cur *cur, + union xfs_btree_rec *rec); void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr); @@ -240,6 +249,7 @@ struct xfs_btree_ops { * Reasons for the update_lastrec method to be called. */ #define LASTREC_UPDATE 0 +#define LASTREC_INSREC 1 /* @@ -549,6 +559,7 @@ int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, union xfs_btree_key *, struct xfs_btree_cur **, int *); int xfs_btree_new_root(struct xfs_btree_cur *, int *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); +int xfs_btree_insert(struct xfs_btree_cur *, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 138651afd44..b68e73bb17c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -418,7 +418,7 @@ xfs_ialloc_ag_alloc( return error; } ASSERT(i == 0); - if ((error = xfs_inobt_insert(cur, &i))) { + if ((error = xfs_btree_insert(cur, &i))) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 7ba3c7bb398..8f66e272056 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -514,228 +514,6 @@ error0: return error; } -/* - * Insert one record/level. Return information to the caller - * allowing the next level up to proceed if necessary. - */ -STATIC int /* error */ -xfs_inobt_insrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to insert record at */ - xfs_agblock_t *bnop, /* i/o: block number inserted */ - xfs_inobt_rec_t *recp, /* i/o: record data inserted */ - xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ - int *stat) /* success/failure */ -{ - xfs_inobt_block_t *block; /* btree block record/key lives in */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_inobt_key_t key; /* key value being inserted */ - xfs_inobt_key_t *kp=NULL; /* pointer to btree keys */ - xfs_agblock_t nbno; /* block number of allocated block */ - xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ - xfs_inobt_key_t nkey; /* new key value, from split */ - xfs_inobt_rec_t nrec; /* new record value, for caller */ - int numrecs; - int optr; /* old ptr value */ - xfs_inobt_ptr_t *pp; /* pointer to btree addresses */ - int ptr; /* index in btree block for this rec */ - xfs_inobt_rec_t *rp=NULL; /* pointer to btree records */ - - /* - * GCC doesn't understand the (arguably complex) control flow in - * this function and complains about uninitialized structure fields - * without this. - */ - memset(&nrec, 0, sizeof(nrec)); - - /* - * If we made it to the root level, allocate a new root block - * and we're done. - */ - if (level >= cur->bc_nlevels) { - error = xfs_btree_new_root(cur, &i); - *bnop = NULLAGBLOCK; - *stat = i; - return error; - } - /* - * Make a key out of the record data to be inserted, and save it. - */ - key.ir_startino = recp->ir_startino; - optr = ptr = cur->bc_ptrs[level]; - /* - * If we're off the left edge, return failure. - */ - if (ptr == 0) { - *stat = 0; - return 0; - } - /* - * Get pointers to the btree buffer and block. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); - numrecs = be16_to_cpu(block->bb_numrecs); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; - /* - * Check that the new entry is being inserted in the right place. - */ - if (ptr <= numrecs) { - if (level == 0) { - rp = XFS_INOBT_REC_ADDR(block, ptr, cur); - xfs_btree_check_rec(cur->bc_btnum, recp, rp); - } else { - kp = XFS_INOBT_KEY_ADDR(block, ptr, cur); - xfs_btree_check_key(cur->bc_btnum, &key, kp); - } - } -#endif - nbno = NULLAGBLOCK; - ncur = NULL; - /* - * If the block is full, we can't insert the new entry until we - * make the block un-full. - */ - if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - /* - * First, try shifting an entry to the right neighbor. - */ - if ((error = xfs_btree_rshift(cur, level, &i))) - return error; - if (i) { - /* nothing */ - } - /* - * Next, try shifting an entry to the left neighbor. - */ - else { - if ((error = xfs_btree_lshift(cur, level, &i))) - return error; - if (i) { - optr = ptr = cur->bc_ptrs[level]; - } else { - union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) }; - /* - * Next, try splitting the current block - * in half. If this works we have to - * re-set our variables because - * we could be in a different block now. - */ - if ((error = xfs_btree_split(cur, level, &bno, - (union xfs_btree_key *)&nkey, - &ncur, &i))) - return error; - nbno = be32_to_cpu(bno.s); - if (i) { - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, - block, level, bp))) - return error; -#endif - ptr = cur->bc_ptrs[level]; - nrec.ir_startino = nkey.ir_startino; - } else { - /* - * Otherwise the insert fails. - */ - *stat = 0; - return 0; - } - } - } - } - /* - * At this point we know there's room for our new entry in the block - * we're pointing at. - */ - numrecs = be16_to_cpu(block->bb_numrecs); - if (level > 0) { - /* - * It's a non-leaf entry. Make a hole for the new data - * in the key and ptr regions of the block. - */ - kp = XFS_INOBT_KEY_ADDR(block, 1, cur); - pp = XFS_INOBT_PTR_ADDR(block, 1, cur); -#ifdef DEBUG - for (i = numrecs; i >= ptr; i--) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level))) - return error; - } -#endif - memmove(&kp[ptr], &kp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*kp)); - memmove(&pp[ptr], &pp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*pp)); - /* - * Now stuff the new data in, bump numrecs and log the new data. - */ -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, *bnop, level))) - return error; -#endif - kp[ptr - 1] = key; - pp[ptr - 1] = cpu_to_be32(*bnop); - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_inobt_log_keys(cur, bp, ptr, numrecs); - xfs_inobt_log_ptrs(cur, bp, ptr, numrecs); - } else { - /* - * It's a leaf entry. Make a hole for the new record. - */ - rp = XFS_INOBT_REC_ADDR(block, 1, cur); - memmove(&rp[ptr], &rp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*rp)); - /* - * Now stuff the new record in, bump numrecs - * and log the new data. - */ - rp[ptr - 1] = *recp; - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_inobt_log_recs(cur, bp, ptr, numrecs); - } - /* - * Log the new number of records in the btree header. - */ - xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); -#ifdef DEBUG - /* - * Check that the key/record is in the right place, now. - */ - if (ptr < numrecs) { - if (level == 0) - xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, - rp + ptr); - else - xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, - kp + ptr); - } -#endif - /* - * If we inserted at the start of a block, update the parents' keys. - */ - if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) - return error; - /* - * Return the new block number, if any. - * If there is one, give back a record value and a cursor too. - */ - *bnop = nbno; - if (nbno != NULLAGBLOCK) { - *recp = nrec; - *curp = ncur; - } - *stat = 1; - return 0; -} - /* * Log header fields from a btree block. */ @@ -912,66 +690,6 @@ xfs_inobt_get_rec( return 0; } -/* - * Insert the current record at the point referenced by cur. - * The cursor may be inconsistent on return if splits have been done. - */ -int /* error */ -xfs_inobt_insert( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* result value, 0 for failure */ - int level; /* current level number in btree */ - xfs_agblock_t nbno; /* new block number (split result) */ - xfs_btree_cur_t *ncur; /* new cursor (split result) */ - xfs_inobt_rec_t nrec; /* record being inserted this level */ - xfs_btree_cur_t *pcur; /* previous level's cursor */ - - level = 0; - nbno = NULLAGBLOCK; - nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); - nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); - nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); - ncur = NULL; - pcur = cur; - /* - * Loop going up the tree, starting at the leaf level. - * Stop when we don't get a split block, that must mean that - * the insert is finished with this level. - */ - do { - /* - * Insert nrec/nbno into this level of the tree. - * Note if we fail, nbno will be null. - */ - if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur, - &i))) { - if (pcur != cur) - xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); - return error; - } - /* - * See if the cursor we just used is trash. - * Can't trash the caller's cursor, but otherwise we should - * if ncur is a new cursor or we're about to be done. - */ - if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) { - cur->bc_nlevels = pcur->bc_nlevels; - xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); - } - /* - * If we got a new cursor, switch to it. - */ - if (ncur) { - pcur = ncur; - ncur = NULL; - } - } while (nbno != NULLAGBLOCK); - *stat = i; - return 0; -} STATIC struct xfs_btree_cur * xfs_inobt_dup_cursor( @@ -1053,6 +771,24 @@ xfs_inobt_init_key_from_rec( key->inobt.ir_startino = rec->inobt.ir_startino; } +STATIC void +xfs_inobt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + rec->inobt.ir_startino = key->inobt.ir_startino; +} + +STATIC void +xfs_inobt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); + rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); + rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); +} + /* * intial value of ptr for lookup */ @@ -1152,6 +888,8 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .alloc_block = xfs_inobt_alloc_block, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_rec_from_key = xfs_inobt_init_rec_from_key, + .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 7f77549e82a..c9cbc4f2168 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -129,12 +129,6 @@ extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat); extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, __int32_t *fcnt, xfs_inofree_t *free, int *stat); -/* - * Insert the current record at the point referenced by cur. - * The cursor may be inconsistent on return if splits have been done. - */ -extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat); - extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); -- cgit v1.2.3 From d4b3a4b7dd62f2e111d4d0afa9ef3f9b6cd955c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:51 +1100 Subject: [XFS] move xfs_bmbt_killroot to common code xfs_bmbt_killroot is a mostly generic implementation of moving from a real block based root to an inode based root. So move it to xfs_btree.c where it can use all the nice infrastructure there and make it pointer size agnostic The new name for it is xfs_btree_kill_iroot, following the old naming but making it clear we're dealing with the root in inode case here, and to avoid confusion with xfs_btree_new_root which is used for the not inode rooted case. I've also added a comment describing what it does and why it's named the way it is. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32203a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 32 +++++++++++++ fs/xfs/xfs_bmap_btree.c | 116 +++++++++------------------------------------- fs/xfs/xfs_btree.c | 112 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 2 + fs/xfs/xfs_ialloc_btree.c | 17 +++++++ 5 files changed, 185 insertions(+), 94 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 818adca77fc..f124ddd91c0 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -834,6 +834,37 @@ xfs_allocbt_alloc_block( return 0; } +STATIC int +xfs_allocbt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + xfs_agblock_t bno; + int error; + + bno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(bp)); + error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); + if (error) + return error; + + /* + * Since blocks move to the free list without the coordination used in + * xfs_bmap_finish, we can't allow block to be available for + * reallocation and non-transaction writing (user data) until we know + * that the transaction that moved it to the free list is permanently + * on disk. We track the blocks by declaring these blocks as "busy"; + * the busy list is maintained on a per-ag basis and each transaction + * records which entries should be removed when the iclog commits to + * disk. If a busy block is allocated, the iclog is pushed up to the + * LSN that freed the block. + */ + xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); + xfs_trans_agbtree_delta(cur->bc_tp, -1); + return 0; +} + /* * Update the longest extent in the AGF */ @@ -1025,6 +1056,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, + .free_block = xfs_allocbt_free_block, .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 2b15df32b7d..6b7774ebc26 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -49,7 +49,6 @@ */ -STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *); STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); @@ -194,7 +193,7 @@ xfs_bmbt_delrec( if (level == cur->bc_nlevels - 1) { xfs_iroot_realloc(cur->bc_private.b.ip, -1, cur->bc_private.b.whichfork); - if ((error = xfs_bmbt_killroot(cur))) { + if ((error = xfs_btree_kill_iroot(cur))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -228,7 +227,7 @@ xfs_bmbt_delrec( */ if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK && level == cur->bc_nlevels - 2) { - if ((error = xfs_bmbt_killroot(cur))) { + if ((error = xfs_btree_kill_iroot(cur))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -456,97 +455,6 @@ error0: return error; } -STATIC int -xfs_bmbt_killroot( - xfs_btree_cur_t *cur) -{ - xfs_bmbt_block_t *block; - xfs_bmbt_block_t *cblock; - xfs_buf_t *cbp; - xfs_bmbt_key_t *ckp; - xfs_bmbt_ptr_t *cpp; -#ifdef DEBUG - int error; -#endif - int i; - xfs_bmbt_key_t *kp; - xfs_inode_t *ip; - xfs_ifork_t *ifp; - int level; - xfs_bmbt_ptr_t *pp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - level = cur->bc_nlevels - 1; - ASSERT(level >= 1); - /* - * Don't deal with the root block needs to be a leaf case. - * We're just going to turn the thing back into extents anyway. - */ - if (level == 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; - } - block = xfs_bmbt_get_block(cur, level, &cbp); - /* - * Give up if the root has multiple children. - */ - if (be16_to_cpu(block->bb_numrecs) != 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; - } - /* - * Only do this if the next level will fit. - * Then the data must be copied up to the inode, - * instead of freeing the root you free the next level. - */ - cbp = cur->bc_bufs[level - 1]; - cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); - if (be16_to_cpu(cblock->bb_numrecs) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; - } - ASSERT(be64_to_cpu(cblock->bb_leftsib) == NULLDFSBNO); - ASSERT(be64_to_cpu(cblock->bb_rightsib) == NULLDFSBNO); - ip = cur->bc_private.b.ip; - ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork); - ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) == - XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes)); - i = (int)(be16_to_cpu(cblock->bb_numrecs) - XFS_BMAP_BLOCK_IMAXRECS(level, cur)); - if (i) { - xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork); - block = ifp->if_broot; - } - be16_add_cpu(&block->bb_numrecs, i); - ASSERT(block->bb_numrecs == cblock->bb_numrecs); - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur); - memcpy(kp, ckp, be16_to_cpu(block->bb_numrecs) * sizeof(*kp)); - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); - cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memcpy(pp, cpp, be16_to_cpu(block->bb_numrecs) * sizeof(*pp)); - xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1, - cur->bc_private.b.flist, cur->bc_mp); - ip->i_d.di_nblocks--; - XFS_TRANS_MOD_DQUOT_BYINO(cur->bc_mp, cur->bc_tp, ip, - XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(cur->bc_tp, cbp); - cur->bc_bufs[level - 1] = NULL; - be16_add_cpu(&block->bb_level, -1); - xfs_trans_log_inode(cur->bc_tp, ip, - XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); - cur->bc_nlevels--; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; -} - /* * Log key values from the btree block. */ @@ -1298,6 +1206,25 @@ xfs_bmbt_alloc_block( return error; } +STATIC int +xfs_bmbt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_trans *tp = cur->bc_tp; + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); + + xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp); + ip->i_d.di_nblocks--; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_binval(tp, bp); + return 0; +} + STATIC int xfs_bmbt_get_maxrecs( struct xfs_btree_cur *cur, @@ -1460,6 +1387,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .dup_cursor = xfs_bmbt_dup_cursor, .update_cursor = xfs_bmbt_update_cursor, .alloc_block = xfs_bmbt_alloc_block, + .free_block = xfs_bmbt_free_block, .get_maxrecs = xfs_bmbt_get_maxrecs, .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 36477aae77d..75a8a7b00df 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -3059,3 +3059,115 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Try to merge a non-leaf block back into the inode root. + * + * Note: the killroot names comes from the fact that we're effectively + * killing the old root block. But because we can't just delete the + * inode we have to copy the single block it was pointing to into the + * inode. + */ +int +xfs_btree_kill_iroot( + struct xfs_btree_cur *cur) +{ + int whichfork = cur->bc_private.b.whichfork; + struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_block *block; + struct xfs_btree_block *cblock; + union xfs_btree_key *kp; + union xfs_btree_key *ckp; + union xfs_btree_ptr *pp; + union xfs_btree_ptr *cpp; + struct xfs_buf *cbp; + int level; + int index; + int numrecs; +#ifdef DEBUG + union xfs_btree_ptr ptr; + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_nlevels > 1); + + /* + * Don't deal with the root block needs to be a leaf case. + * We're just going to turn the thing back into extents anyway. + */ + level = cur->bc_nlevels - 1; + if (level == 1) + goto out0; + + /* + * Give up if the root has multiple children. + */ + block = xfs_btree_get_iroot(cur); + if (xfs_btree_get_numrecs(block) != 1) + goto out0; + + cblock = xfs_btree_get_block(cur, level - 1, &cbp); + numrecs = xfs_btree_get_numrecs(cblock); + + /* + * Only do this if the next level will fit. + * Then the data must be copied up to the inode, + * instead of freeing the root you free the next level. + */ + if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) + goto out0; + + XFS_BTREE_STATS_INC(cur, killroot); + +#ifdef DEBUG + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); + ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); +#endif + + index = numrecs - cur->bc_ops->get_maxrecs(cur, level); + if (index) { + xfs_iroot_realloc(cur->bc_private.b.ip, index, + cur->bc_private.b.whichfork); + block = (struct xfs_btree_block *)ifp->if_broot; + } + + be16_add_cpu(&block->bb_numrecs, index); + ASSERT(block->bb_numrecs == cblock->bb_numrecs); + + kp = xfs_btree_key_addr(cur, 1, block); + ckp = xfs_btree_key_addr(cur, 1, cblock); + xfs_btree_copy_keys(cur, kp, ckp, numrecs); + + pp = xfs_btree_ptr_addr(cur, 1, block); + cpp = xfs_btree_ptr_addr(cur, 1, cblock); +#ifdef DEBUG + for (i = 0; i < numrecs; i++) { + int error; + + error = xfs_btree_check_ptr(cur, cpp, i, level - 1); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + } +#endif + xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); + + cur->bc_ops->free_block(cur, cbp); + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level - 1] = NULL; + be16_add_cpu(&block->bb_level, -1); + xfs_trans_log_inode(cur->bc_tp, ip, + XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); + cur->bc_nlevels--; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 6f03871f599..ff2552febba 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -198,6 +198,7 @@ struct xfs_btree_ops { union xfs_btree_ptr *start_bno, union xfs_btree_ptr *new_bno, int length, int *stat); + int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); /* update last record information */ void (*update_lastrec)(struct xfs_btree_cur *cur, @@ -559,6 +560,7 @@ int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, union xfs_btree_key *, struct xfs_btree_cur **, int *); int xfs_btree_new_root(struct xfs_btree_cur *, int *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); +int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); /* diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 8f66e272056..90f1d4ee772 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -754,6 +754,22 @@ xfs_inobt_alloc_block( return 0; } +STATIC int +xfs_inobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + xfs_fsblock_t fsbno; + int error; + + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)); + error = xfs_free_extent(cur->bc_tp, fsbno, 1); + if (error) + return error; + + xfs_trans_binval(cur->bc_tp, bp); + return error; +} STATIC int xfs_inobt_get_maxrecs( @@ -886,6 +902,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_inobt_set_root, .alloc_block = xfs_inobt_alloc_block, + .free_block = xfs_inobt_free_block, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, .init_rec_from_key = xfs_inobt_init_rec_from_key, -- cgit v1.2.3 From 91cca5df9bc85efdabfa645f51d54259ed09f4bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:01 +1100 Subject: [XFS] implement generic xfs_btree_delete/delrec Make the btree delete code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32205a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc.c | 14 +- fs/xfs/xfs_alloc_btree.c | 744 ++++------------------------------------------ fs/xfs/xfs_alloc_btree.h | 7 - fs/xfs/xfs_bmap.c | 14 +- fs/xfs/xfs_bmap_btree.c | 525 +------------------------------- fs/xfs/xfs_bmap_btree.h | 3 - fs/xfs/xfs_btree.c | 593 ++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 5 + fs/xfs/xfs_ialloc.c | 4 +- fs/xfs/xfs_ialloc_btree.c | 646 +++------------------------------------- fs/xfs/xfs_ialloc_btree.h | 7 - 11 files changed, 723 insertions(+), 1839 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index a983824c12b..e9c70249d2c 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -398,7 +398,7 @@ xfs_alloc_fixup_trees( /* * Delete the entry from the by-size btree. */ - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); /* @@ -427,7 +427,7 @@ xfs_alloc_fixup_trees( /* * No remaining freespace, just delete the by-block tree entry. */ - if ((error = xfs_alloc_delete(bno_cur, &i))) + if ((error = xfs_btree_delete(bno_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } else { @@ -1651,7 +1651,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* @@ -1660,13 +1660,13 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* * Delete the old by-block entry for the right block. */ - if ((error = xfs_alloc_delete(bno_cur, &i))) + if ((error = xfs_btree_delete(bno_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* @@ -1711,7 +1711,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* @@ -1737,7 +1737,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index f124ddd91c0..d256b51f913 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -40,691 +40,6 @@ #include "xfs_alloc.h" #include "xfs_error.h" -/* - * Prototypes for internal functions. - */ - -STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); -STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); - -/* - * Internal functions. - */ - -/* - * Single level of the xfs_alloc_delete record deletion routine. - * Delete record pointed to by cur/level. - * Remove the record from its block then rebalance the tree. - * Return 0 for error, 1 for done, 2 to go on to the next level. - */ -STATIC int /* error */ -xfs_alloc_delrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level removing record from */ - int *stat) /* fail/done/go-on */ -{ - xfs_agf_t *agf; /* allocation group freelist header */ - xfs_alloc_block_t *block; /* btree block record/key lives in */ - xfs_agblock_t bno; /* btree block number */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_alloc_key_t key; /* kp points here if block is level 0 */ - xfs_agblock_t lbno; /* left block's block number */ - xfs_buf_t *lbp; /* left block's buffer pointer */ - xfs_alloc_block_t *left; /* left btree block */ - xfs_alloc_key_t *lkp=NULL; /* left block key pointer */ - xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */ - int lrecs=0; /* number of records in left block */ - xfs_alloc_rec_t *lrp; /* left block record pointer */ - xfs_mount_t *mp; /* mount structure */ - int ptr; /* index in btree block for this rec */ - xfs_agblock_t rbno; /* right block's block number */ - xfs_buf_t *rbp; /* right block's buffer pointer */ - xfs_alloc_block_t *right; /* right btree block */ - xfs_alloc_key_t *rkp; /* right block key pointer */ - xfs_alloc_ptr_t *rpp; /* right block address pointer */ - int rrecs=0; /* number of records in right block */ - int numrecs; - xfs_alloc_rec_t *rrp; /* right block record pointer */ - xfs_btree_cur_t *tcur; /* temporary btree cursor */ - - /* - * Get the index of the entry being deleted, check for nothing there. - */ - ptr = cur->bc_ptrs[level]; - if (ptr == 0) { - *stat = 0; - return 0; - } - /* - * Get the buffer & block containing the record or key/ptr. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - /* - * Fail if we're off the end of the block. - */ - numrecs = be16_to_cpu(block->bb_numrecs); - if (ptr > numrecs) { - *stat = 0; - return 0; - } - XFS_STATS_INC(xs_abt_delrec); - /* - * It's a nonleaf. Excise the key and ptr being deleted, by - * sliding the entries past them down one. - * Log the changed areas of the block. - */ - if (level > 0) { - lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur); - lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur); -#ifdef DEBUG - for (i = ptr; i < numrecs; i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level))) - return error; - } -#endif - if (ptr < numrecs) { - memmove(&lkp[ptr - 1], &lkp[ptr], - (numrecs - ptr) * sizeof(*lkp)); - memmove(&lpp[ptr - 1], &lpp[ptr], - (numrecs - ptr) * sizeof(*lpp)); - xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1); - xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1); - } - } - /* - * It's a leaf. Excise the record being deleted, by sliding the - * entries past it down one. Log the changed areas of the block. - */ - else { - lrp = XFS_ALLOC_REC_ADDR(block, 1, cur); - if (ptr < numrecs) { - memmove(&lrp[ptr - 1], &lrp[ptr], - (numrecs - ptr) * sizeof(*lrp)); - xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1); - } - /* - * If it's the first record in the block, we'll need a key - * structure to pass up to the next level (updkey). - */ - if (ptr == 1) { - key.ar_startblock = lrp->ar_startblock; - key.ar_blockcount = lrp->ar_blockcount; - lkp = &key; - } - } - /* - * Decrement and log the number of entries in the block. - */ - numrecs--; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); - /* - * See if the longest free extent in the allocation group was - * changed by this operation. True if it's the by-size btree, and - * this is the leaf level, and there is no right sibling block, - * and this was the last record. - */ - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - mp = cur->bc_mp; - - if (level == 0 && - cur->bc_btnum == XFS_BTNUM_CNT && - be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && - ptr > numrecs) { - ASSERT(ptr == numrecs + 1); - /* - * There are still records in the block. Grab the size - * from the last one. - */ - if (numrecs) { - rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur); - agf->agf_longest = rrp->ar_blockcount; - } - /* - * No free extents left. - */ - else - agf->agf_longest = 0; - mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest = - be32_to_cpu(agf->agf_longest); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_LONGEST); - } - /* - * Is this the root level? If so, we're almost done. - */ - if (level == cur->bc_nlevels - 1) { - /* - * If this is the root level, - * and there's only one entry left, - * and it's NOT the leaf level, - * then we can get rid of this level. - */ - if (numrecs == 1 && level > 0) { - /* - * lpp is still set to the first pointer in the block. - * Make it the new root of the btree. - */ - bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]); - agf->agf_roots[cur->bc_btnum] = *lpp; - be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1); - mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--; - /* - * Put this buffer/block on the ag's freelist. - */ - error = xfs_alloc_put_freelist(cur->bc_tp, - cur->bc_private.a.agbp, NULL, bno, 1); - if (error) - return error; - /* - * Since blocks move to the free list without the - * coordination used in xfs_bmap_finish, we can't allow - * block to be available for reallocation and - * non-transaction writing (user data) until we know - * that the transaction that moved it to the free list - * is permanently on disk. We track the blocks by - * declaring these blocks as "busy"; the busy list is - * maintained on a per-ag basis and each transaction - * records which entries should be removed when the - * iclog commits to disk. If a busy block is - * allocated, the iclog is pushed up to the LSN - * that freed the block. - */ - xfs_alloc_mark_busy(cur->bc_tp, - be32_to_cpu(agf->agf_seqno), bno, 1); - - xfs_trans_agbtree_delta(cur->bc_tp, -1); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_ROOTS | XFS_AGF_LEVELS); - /* - * Update the cursor so there's one fewer level. - */ - xfs_btree_setbuf(cur, level, NULL); - cur->bc_nlevels--; - } else if (level > 0 && - (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * If we deleted the leftmost entry in the block, update the - * key values above us in the tree. - */ - if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)lkp, level + 1))) - return error; - /* - * If the number of records remaining in the block is at least - * the minimum, we're done. - */ - if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * Otherwise, we have to move some records around to keep the - * tree balanced. Look at the left and right sibling blocks to - * see if we can re-balance by moving only one record. - */ - rbno = be32_to_cpu(block->bb_rightsib); - lbno = be32_to_cpu(block->bb_leftsib); - bno = NULLAGBLOCK; - ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK); - /* - * Duplicate the cursor so our btree manipulations here won't - * disrupt the next level up. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - return error; - /* - * If there's a right sibling, see if it's ok to shift an entry - * out of it. - */ - if (rbno != NULLAGBLOCK) { - /* - * Move the temp cursor to the last entry in the next block. - * Actually any entry but the first would suffice. - */ - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Grab a pointer to the block. - */ - rbp = tcur->bc_bufs[level]; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - goto error0; -#endif - /* - * Grab the current block number, for future use. - */ - bno = be32_to_cpu(right->bb_leftsib); - /* - * If right block is full enough so that removing one entry - * won't make it too empty, and left-shifting an entry out - * of right to us works, we're done. - */ - if (be16_to_cpu(right->bb_numrecs) - 1 >= - XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_btree_lshift(tcur, level, &i))) - goto error0; - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_ALLOC_BLOCK_MINRECS(level, cur)); - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - if (level > 0 && - (error = xfs_btree_decrement(cur, level, - &i))) - return error; - *stat = 1; - return 0; - } - } - /* - * Otherwise, grab the number of records in right for - * future reference, and fix up the temp cursor to point - * to our block again (last record). - */ - rrecs = be16_to_cpu(right->bb_numrecs); - if (lbno != NULLAGBLOCK) { - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_decrement(tcur, level, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } - } - /* - * If there's a left sibling, see if it's ok to shift an entry - * out of it. - */ - if (lbno != NULLAGBLOCK) { - /* - * Move the temp cursor to the first entry in the - * previous block. - */ - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_decrement(tcur, level, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_btree_firstrec(tcur, level); - /* - * Grab a pointer to the block. - */ - lbp = tcur->bc_bufs[level]; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - goto error0; -#endif - /* - * Grab the current block number, for future use. - */ - bno = be32_to_cpu(left->bb_rightsib); - /* - * If left block is full enough so that removing one entry - * won't make it too empty, and right-shifting an entry out - * of left to us works, we're done. - */ - if (be16_to_cpu(left->bb_numrecs) - 1 >= - XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_btree_rshift(tcur, level, &i))) - goto error0; - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_ALLOC_BLOCK_MINRECS(level, cur)); - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - if (level == 0) - cur->bc_ptrs[0]++; - *stat = 1; - return 0; - } - } - /* - * Otherwise, grab the number of records in right for - * future reference. - */ - lrecs = be16_to_cpu(left->bb_numrecs); - } - /* - * Delete the temp cursor, we're done with it. - */ - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - /* - * If here, we need to do a join to keep the tree balanced. - */ - ASSERT(bno != NULLAGBLOCK); - /* - * See if we can join with the left neighbor block. - */ - if (lbno != NULLAGBLOCK && - lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - /* - * Set "right" to be the starting block, - * "left" to be the left neighbor. - */ - rbno = bno; - right = block; - rrecs = be16_to_cpu(right->bb_numrecs); - rbp = bp; - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, lbno, 0, &lbp, - XFS_ALLOC_BTREE_REF))) - return error; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); - lrecs = be16_to_cpu(left->bb_numrecs); - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; - } - /* - * If that won't work, see if we can join with the right neighbor block. - */ - else if (rbno != NULLAGBLOCK && - rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - /* - * Set "left" to be the starting block, - * "right" to be the right neighbor. - */ - lbno = bno; - left = block; - lrecs = be16_to_cpu(left->bb_numrecs); - lbp = bp; - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, rbno, 0, &rbp, - XFS_ALLOC_BTREE_REF))) - return error; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); - rrecs = be16_to_cpu(right->bb_numrecs); - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; - } - /* - * Otherwise, we can't fix the imbalance. - * Just return. This is probably a logic error, but it's not fatal. - */ - else { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * We're now going to join "left" and "right" by moving all the stuff - * in "right" to "left" and deleting "right". - */ - if (level > 0) { - /* - * It's a non-leaf. Move keys and pointers. - */ - lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur); - lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur); - rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); - rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < rrecs; i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) - return error; - } -#endif - memcpy(lkp, rkp, rrecs * sizeof(*lkp)); - memcpy(lpp, rpp, rrecs * sizeof(*lpp)); - xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); - xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); - } else { - /* - * It's a leaf. Move records. - */ - lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur); - rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - memcpy(lrp, rrp, rrecs * sizeof(*lrp)); - xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); - } - /* - * If we joined with the left neighbor, set the buffer in the - * cursor to the left block, and fix up the index. - */ - if (bp != lbp) { - xfs_btree_setbuf(cur, level, lbp); - cur->bc_ptrs[level] += lrecs; - } - /* - * If we joined with the right neighbor and there's a level above - * us, increment the cursor at that level. - */ - else if (level + 1 < cur->bc_nlevels && - (error = xfs_btree_increment(cur, level + 1, &i))) - return error; - /* - * Fix up the number of records in the surviving block. - */ - lrecs += rrecs; - left->bb_numrecs = cpu_to_be16(lrecs); - /* - * Fix up the right block pointer in the surviving block, and log it. - */ - left->bb_rightsib = right->bb_rightsib; - xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - /* - * If there is a right sibling now, make it point to the - * remaining block. - */ - if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) { - xfs_alloc_block_t *rrblock; - xfs_buf_t *rrbp; - - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, - &rrbp, XFS_ALLOC_BTREE_REF))) - return error; - rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); - if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) - return error; - rrblock->bb_leftsib = cpu_to_be32(lbno); - xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); - } - /* - * Free the deleting block by putting it on the freelist. - */ - error = xfs_alloc_put_freelist(cur->bc_tp, - cur->bc_private.a.agbp, NULL, rbno, 1); - if (error) - return error; - /* - * Since blocks move to the free list without the coordination - * used in xfs_bmap_finish, we can't allow block to be available - * for reallocation and non-transaction writing (user data) - * until we know that the transaction that moved it to the free - * list is permanently on disk. We track the blocks by declaring - * these blocks as "busy"; the busy list is maintained on a - * per-ag basis and each transaction records which entries - * should be removed when the iclog commits to disk. If a - * busy block is allocated, the iclog is pushed up to the - * LSN that freed the block. - */ - xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); - xfs_trans_agbtree_delta(cur->bc_tp, -1); - - /* - * Adjust the current level's cursor so that we're left referring - * to the right node, after we're done. - * If this leaves the ptr value 0 our caller will fix it up. - */ - if (level > 0) - cur->bc_ptrs[level]--; - /* - * Return value means the next level up has something to do. - */ - *stat = 2; - return 0; - -error0: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Log header fields from a btree block. - */ -STATIC void -xfs_alloc_log_block( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer containing btree block */ - int fields) /* mask of fields: XFS_BB_... */ -{ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - static const short offsets[] = { /* table of offsets */ - offsetof(xfs_alloc_block_t, bb_magic), - offsetof(xfs_alloc_block_t, bb_level), - offsetof(xfs_alloc_block_t, bb_numrecs), - offsetof(xfs_alloc_block_t, bb_leftsib), - offsetof(xfs_alloc_block_t, bb_rightsib), - sizeof(xfs_alloc_block_t) - }; - - xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); - xfs_trans_log_buf(tp, bp, first, last); -} - -/* - * Log keys from a btree block (nonleaf). - */ -STATIC void -xfs_alloc_log_keys( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int kfirst, /* index of first key to log */ - int klast) /* index of last key to log */ -{ - xfs_alloc_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - xfs_alloc_key_t *kp; /* key pointer in btree block */ - int last; /* last byte offset logged */ - - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - -/* - * Log block pointer fields from a btree block (nonleaf). - */ -STATIC void -xfs_alloc_log_ptrs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int pfirst, /* index of first pointer to log */ - int plast) /* index of last pointer to log */ -{ - xfs_alloc_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */ - - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - -/* - * Log records from a btree block (leaf). - */ -STATIC void -xfs_alloc_log_recs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int rfirst, /* index of first record to log */ - int rlast) /* index of last record to log */ -{ - xfs_alloc_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - xfs_alloc_rec_t *rp; /* record pointer for btree block */ - - - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - rp = XFS_ALLOC_REC_ADDR(block, 1, cur); -#ifdef DEBUG - { - xfs_agf_t *agf; - xfs_alloc_rec_t *p; - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++) - ASSERT(be32_to_cpu(p->ar_startblock) + - be32_to_cpu(p->ar_blockcount) <= - be32_to_cpu(agf->agf_length)); - } -#endif - first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - - -/* - * Externally visible routines. - */ - -/* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -int /* error */ -xfs_alloc_delete( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* result code */ - int level; /* btree level */ - - /* - * Go up the tree, starting at leaf level. - * If 2 is returned then a join was done; go to the next level. - * Otherwise we are done. - */ - for (level = 0, i = 2; i == 2; level++) { - if ((error = xfs_alloc_delrec(cur, level, &i))) - return error; - } - if (i == 0) { - for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_btree_decrement(cur, level, &i))) - return error; - break; - } - } - } - *stat = i; - return 0; -} /* * Get the data from the pointed-to record. @@ -879,6 +194,7 @@ xfs_allocbt_update_lastrec( struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); __be32 len; + int numrecs; ASSERT(cur->bc_btnum == XFS_BTNUM_CNT); @@ -897,6 +213,22 @@ xfs_allocbt_update_lastrec( be32_to_cpu(agf->agf_longest)) return; len = rec->alloc.ar_blockcount; + break; + case LASTREC_DELREC: + numrecs = xfs_btree_get_numrecs(block); + if (ptr <= numrecs) + return; + ASSERT(ptr == numrecs + 1); + + if (numrecs) { + xfs_alloc_rec_t *rrp; + + rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur); + len = rrp->ar_blockcount; + } else { + len = 0; + } + break; default: ASSERT(0); @@ -908,6 +240,14 @@ xfs_allocbt_update_lastrec( xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); } +STATIC int +xfs_allocbt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_alloc_mnr[level != 0]; +} + STATIC int xfs_allocbt_get_maxrecs( struct xfs_btree_cur *cur, @@ -983,6 +323,38 @@ xfs_allocbt_key_diff( return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } +STATIC int +xfs_allocbt_kill_root( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + union xfs_btree_ptr *newroot) +{ + int error; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, killroot); + + /* + * Update the root pointer, decreasing the level by 1 and then + * free the old root. + */ + xfs_allocbt_set_root(cur, newroot, -1); + error = xfs_allocbt_free_block(cur, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + XFS_BTREE_STATS_INC(cur, free); + + xfs_btree_setbuf(cur, level, NULL); + cur->bc_nlevels--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_allocbt_trace_buf; @@ -1055,9 +427,11 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, .set_root = xfs_allocbt_set_root, + .kill_root = xfs_allocbt_kill_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, .update_lastrec = xfs_allocbt_update_lastrec, + .get_minrecs = xfs_allocbt_get_minrecs, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, .init_rec_from_key = xfs_allocbt_init_rec_from_key, diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 2e340ef8025..8d2e3ec21fd 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -94,13 +94,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur)) -/* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat); - /* * Get the data from the pointed-to record. */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 85e2e8b9cf4..74761ca2c63 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -864,7 +864,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -1425,13 +1425,13 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -1474,7 +1474,7 @@ xfs_bmap_add_extent_unwritten_real( &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -1517,7 +1517,7 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -2152,7 +2152,7 @@ xfs_bmap_add_extent_hole_real( right.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -3216,7 +3216,7 @@ xfs_bmap_del_extent( flags |= XFS_ILOG_FEXT(whichfork); break; } - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); break; diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 6b7774ebc26..5b8030561d7 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -44,14 +44,6 @@ #include "xfs_error.h" #include "xfs_quota.h" -/* - * Prototypes for internal btree functions. - */ - - -STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); - #undef EXIT #define ENTRY XBT_ENTRY @@ -80,453 +72,6 @@ STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); #define XFS_BMBT_TRACE_CURSOR(c,s) \ XFS_BTREE_TRACE_CURSOR(c,s) - -/* - * Internal functions. - */ - -/* - * Delete record pointed to by cur/level. - */ -STATIC int /* error */ -xfs_bmbt_delrec( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - xfs_bmbt_block_t *block; /* bmap btree block */ - xfs_fsblock_t bno; /* fs-relative block number */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop counter */ - int j; /* temp state */ - xfs_bmbt_key_t key; /* bmap btree key */ - xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */ - xfs_fsblock_t lbno; /* left sibling block number */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_bmbt_block_t *left; /* left btree block */ - xfs_bmbt_key_t *lkp; /* left btree key */ - xfs_bmbt_ptr_t *lpp; /* left address pointer */ - int lrecs=0; /* left record count */ - xfs_bmbt_rec_t *lrp; /* left record pointer */ - xfs_mount_t *mp; /* file system mount point */ - xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ - int ptr; /* key/record index */ - xfs_fsblock_t rbno; /* right sibling block number */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_bmbt_block_t *right; /* right btree block */ - xfs_bmbt_key_t *rkp; /* right btree key */ - xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */ - xfs_bmbt_ptr_t *rpp; /* right address pointer */ - xfs_bmbt_block_t *rrblock; /* right-right btree block */ - xfs_buf_t *rrbp; /* right-right buffer pointer */ - int rrecs=0; /* right record count */ - xfs_bmbt_rec_t *rrp; /* right record pointer */ - xfs_btree_cur_t *tcur; /* temporary btree cursor */ - int numrecs; /* temporary numrec count */ - int numlrecs, numrrecs; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - ptr = cur->bc_ptrs[level]; - tcur = NULL; - if (ptr == 0) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - block = xfs_bmbt_get_block(cur, level, &bp); - numrecs = be16_to_cpu(block->bb_numrecs); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } -#endif - if (ptr > numrecs) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - XFS_STATS_INC(xs_bmbt_delrec); - if (level > 0) { - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); -#ifdef DEBUG - for (i = ptr; i < numrecs; i++) { - if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - } -#endif - if (ptr < numrecs) { - memmove(&kp[ptr - 1], &kp[ptr], - (numrecs - ptr) * sizeof(*kp)); - memmove(&pp[ptr - 1], &pp[ptr], - (numrecs - ptr) * sizeof(*pp)); - xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1); - xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1); - } - } else { - rp = XFS_BMAP_REC_IADDR(block, 1, cur); - if (ptr < numrecs) { - memmove(&rp[ptr - 1], &rp[ptr], - (numrecs - ptr) * sizeof(*rp)); - xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1); - } - if (ptr == 1) { - key.br_startoff = - cpu_to_be64(xfs_bmbt_disk_get_startoff(rp)); - kp = &key; - } - } - numrecs--; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS); - /* - * We're at the root level. - * First, shrink the root block in-memory. - * Try to get rid of the next level down. - * If we can't then there's nothing left to do. - */ - if (level == cur->bc_nlevels - 1) { - xfs_iroot_realloc(cur->bc_private.b.ip, -1, - cur->bc_private.b.whichfork); - if ((error = xfs_btree_kill_iroot(cur))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (level > 0 && (error = xfs_btree_decrement(cur, level, &j))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &j))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - rbno = be64_to_cpu(block->bb_rightsib); - lbno = be64_to_cpu(block->bb_leftsib); - /* - * One child of root, need to get a chance to copy its contents - * into the root and delete it. Can't go up to next level, - * there's nothing to delete there. - */ - if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK && - level == cur->bc_nlevels - 2) { - if ((error = xfs_btree_kill_iroot(cur))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK); - if ((error = xfs_btree_dup_cursor(cur, &tcur))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - bno = NULLFSBLOCK; - if (rbno != NULLFSBLOCK) { - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - rbp = tcur->bc_bufs[level]; - right = XFS_BUF_TO_BMBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } -#endif - bno = be64_to_cpu(right->bb_leftsib); - if (be16_to_cpu(right->bb_numrecs) - 1 >= - XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if ((error = xfs_btree_lshift(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_BMAP_BLOCK_IMINRECS(level, tcur)); - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - if (level > 0) { - if ((error = xfs_btree_decrement(cur, - level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, - ERROR); - goto error0; - } - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - } - rrecs = be16_to_cpu(right->bb_numrecs); - if (lbno != NULLFSBLOCK) { - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_decrement(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } - } - if (lbno != NULLFSBLOCK) { - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * decrement to last in block - */ - if ((error = xfs_btree_decrement(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - lbp = tcur->bc_bufs[level]; - left = XFS_BUF_TO_BMBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } -#endif - bno = be64_to_cpu(left->bb_rightsib); - if (be16_to_cpu(left->bb_numrecs) - 1 >= - XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if ((error = xfs_btree_rshift(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_BMAP_BLOCK_IMINRECS(level, tcur)); - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - if (level == 0) - cur->bc_ptrs[0]++; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - } - lrecs = be16_to_cpu(left->bb_numrecs); - } - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - mp = cur->bc_mp; - ASSERT(bno != NULLFSBLOCK); - if (lbno != NULLFSBLOCK && - lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - rbno = bno; - right = block; - rbp = bp; - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - left = XFS_BUF_TO_BMBT_BLOCK(lbp); - if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - } else if (rbno != NULLFSBLOCK && - rrecs + be16_to_cpu(block->bb_numrecs) <= - XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - lbno = bno; - left = block; - lbp = bp; - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - right = XFS_BUF_TO_BMBT_BLOCK(rbp); - if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - lrecs = be16_to_cpu(left->bb_numrecs); - } else { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - numlrecs = be16_to_cpu(left->bb_numrecs); - numrrecs = be16_to_cpu(right->bb_numrecs); - if (level > 0) { - lkp = XFS_BMAP_KEY_IADDR(left, numlrecs + 1, cur); - lpp = XFS_BMAP_PTR_IADDR(left, numlrecs + 1, cur); - rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); - rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < numrrecs; i++) { - if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - } -#endif - memcpy(lkp, rkp, numrrecs * sizeof(*lkp)); - memcpy(lpp, rpp, numrrecs * sizeof(*lpp)); - xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs); - xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs); - } else { - lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur); - rrp = XFS_BMAP_REC_IADDR(right, 1, cur); - memcpy(lrp, rrp, numrrecs * sizeof(*lrp)); - xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs); - } - be16_add_cpu(&left->bb_numrecs, numrrecs); - left->bb_rightsib = right->bb_rightsib; - xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS); - if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) { - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, - be64_to_cpu(left->bb_rightsib), - 0, &rrbp, XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp); - if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - rrblock->bb_leftsib = cpu_to_be64(lbno); - xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB); - } - xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1, - cur->bc_private.b.flist, mp); - cur->bc_private.b.ip->i_d.di_nblocks--; - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - XFS_TRANS_MOD_DQUOT_BYINO(mp, cur->bc_tp, cur->bc_private.b.ip, - XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(cur->bc_tp, rbp); - if (bp != lbp) { - cur->bc_bufs[level] = lbp; - cur->bc_ptrs[level] += lrecs; - cur->bc_ra[level] = 0; - } else if ((error = xfs_btree_increment(cur, level + 1, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (level > 0) - cur->bc_ptrs[level]--; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 2; - return 0; - -error0: - if (tcur) - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Log key values from the btree block. - */ -STATIC void -xfs_bmbt_log_keys( - xfs_btree_cur_t *cur, - xfs_buf_t *bp, - int kfirst, - int klast) -{ - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast); - tp = cur->bc_tp; - if (bp) { - xfs_bmbt_block_t *block; - int first; - xfs_bmbt_key_t *kp; - int last; - - block = XFS_BUF_TO_BMBT_BLOCK(bp); - kp = XFS_BMAP_KEY_DADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(tp, bp, first, last); - } else { - xfs_inode_t *ip; - - ip = cur->bc_private.b.ip; - xfs_trans_log_inode(tp, ip, - XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); -} - -/* - * Log pointer values from the btree block. - */ -STATIC void -xfs_bmbt_log_ptrs( - xfs_btree_cur_t *cur, - xfs_buf_t *bp, - int pfirst, - int plast) -{ - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast); - tp = cur->bc_tp; - if (bp) { - xfs_bmbt_block_t *block; - int first; - int last; - xfs_bmbt_ptr_t *pp; - - block = XFS_BUF_TO_BMBT_BLOCK(bp); - pp = XFS_BMAP_PTR_DADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(tp, bp, first, last); - } else { - xfs_inode_t *ip; - - ip = cur->bc_private.b.ip; - xfs_trans_log_inode(tp, ip, - XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); -} - /* * Determine the extent state. */ @@ -575,42 +120,6 @@ xfs_bmdr_to_bmbt( memcpy(tpp, fpp, sizeof(*fpp) * dmxr); } -/* - * Delete the record pointed to by cur. - */ -int /* error */ -xfs_bmbt_delete( - xfs_btree_cur_t *cur, - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; - int level; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - for (level = 0, i = 2; i == 2; level++) { - if ((error = xfs_bmbt_delrec(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } - if (i == 0) { - for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_btree_decrement(cur, level, - &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - break; - } - } - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = i; - return 0; -} - /* * Convert a compressed bmap extent record to an uncompressed form. * This code must be in sync with the routines xfs_bmbt_get_startoff, @@ -664,31 +173,6 @@ xfs_bmbt_get_all( __xfs_bmbt_get_all(r->l0, r->l1, s); } -/* - * Get the block pointer for the given level of the cursor. - * Fill in the buffer pointer, if applicable. - */ -xfs_bmbt_block_t * -xfs_bmbt_get_block( - xfs_btree_cur_t *cur, - int level, - xfs_buf_t **bpp) -{ - xfs_ifork_t *ifp; - xfs_bmbt_block_t *rval; - - if (level < cur->bc_nlevels - 1) { - *bpp = cur->bc_bufs[level]; - rval = XFS_BUF_TO_BMBT_BLOCK(*bpp); - } else { - *bpp = NULL; - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, - cur->bc_private.b.whichfork); - rval = ifp->if_broot; - } - return rval; -} - /* * Extract the blockcount field from an in memory bmap extent record. */ @@ -1225,6 +709,14 @@ xfs_bmbt_free_block( return 0; } +STATIC int +xfs_bmbt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return XFS_BMAP_BLOCK_IMINRECS(level, cur); +} + STATIC int xfs_bmbt_get_maxrecs( struct xfs_btree_cur *cur, @@ -1389,6 +881,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .alloc_block = xfs_bmbt_alloc_block, .free_block = xfs_bmbt_free_block, .get_maxrecs = xfs_bmbt_get_maxrecs, + .get_minrecs = xfs_bmbt_get_minrecs, .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, .init_rec_from_key = xfs_bmbt_init_rec_from_key, diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 703fe2e3434..952ab395f79 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -237,10 +237,7 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; * Prototypes for xfs_bmap.c to call. */ extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); -extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); -extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur, - int, struct xfs_buf **bpp); extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 75a8a7b00df..28cc7681834 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -3171,3 +3171,596 @@ out0: XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } + +STATIC int +xfs_btree_dec_cursor( + struct xfs_btree_cur *cur, + int level, + int *stat) +{ + int error; + int i; + + if (level > 0) { + error = xfs_btree_decrement(cur, level, &i); + if (error) + return error; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +} + +/* + * Single level of the btree record deletion routine. + * Delete record pointed to by cur/level. + * Remove the record from its block then rebalance the tree. + * Return 0 for error, 1 for done, 2 to go on to the next level. + */ +STATIC int /* error */ +xfs_btree_delrec( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level removing record from */ + int *stat) /* fail/done/go-on */ +{ + struct xfs_btree_block *block; /* btree block */ + union xfs_btree_ptr cptr; /* current block ptr */ + struct xfs_buf *bp; /* buffer for block */ + int error; /* error return value */ + int i; /* loop counter */ + union xfs_btree_key key; /* storage for keyp */ + union xfs_btree_key *keyp = &key; /* passed to the next level */ + union xfs_btree_ptr lptr; /* left sibling block ptr */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + int lrecs = 0; /* left record count */ + int ptr; /* key/record index */ + union xfs_btree_ptr rptr; /* right sibling block ptr */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + struct xfs_btree_block *rrblock; /* right-right btree block */ + struct xfs_buf *rrbp; /* right-right buffer pointer */ + int rrecs = 0; /* right record count */ + struct xfs_btree_cur *tcur; /* temporary btree cursor */ + int numrecs; /* temporary numrec count */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + tcur = NULL; + + /* Get the index of the entry being deleted, check for nothing there. */ + ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Get the buffer & block containing the record or key/ptr. */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* Fail if we're off the end of the block. */ + if (ptr > numrecs) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + XFS_BTREE_STATS_INC(cur, delrec); + XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); + + /* Excise the entries being deleted. */ + if (level > 0) { + /* It's a nonleaf. operate on keys and ptrs */ + union xfs_btree_key *lkp; + union xfs_btree_ptr *lpp; + + lkp = xfs_btree_key_addr(cur, ptr + 1, block); + lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); + +#ifdef DEBUG + for (i = 0; i < numrecs - ptr; i++) { + error = xfs_btree_check_ptr(cur, lpp, i, level); + if (error) + goto error0; + } +#endif + + if (ptr < numrecs) { + xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); + xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); + xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); + xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); + } + + /* + * If it's the first record in the block, we'll need to pass a + * key up to the next level (updkey). + */ + if (ptr == 1) + keyp = xfs_btree_key_addr(cur, 1, block); + } else { + /* It's a leaf. operate on records */ + if (ptr < numrecs) { + xfs_btree_shift_recs(cur, + xfs_btree_rec_addr(cur, ptr + 1, block), + -1, numrecs - ptr); + xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); + } + + /* + * If it's the first record in the block, we'll need a key + * structure to pass up to the next level (updkey). + */ + if (ptr == 1) { + cur->bc_ops->init_key_from_rec(&key, + xfs_btree_rec_addr(cur, 1, block)); + keyp = &key; + } + } + + /* + * Decrement and log the number of entries in the block. + */ + xfs_btree_set_numrecs(block, --numrecs); + xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, level)) { + cur->bc_ops->update_lastrec(cur, block, NULL, + ptr, LASTREC_DELREC); + } + + /* + * We're at the root level. First, shrink the root block in-memory. + * Try to get rid of the next level down. If we can't then there's + * nothing left to do. + */ + if (level == cur->bc_nlevels - 1) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + xfs_iroot_realloc(cur->bc_private.b.ip, -1, + cur->bc_private.b.whichfork); + + error = xfs_btree_kill_iroot(cur); + if (error) + goto error0; + + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + *stat = 1; + return 0; + } + + /* + * If this is the root level, and there's only one entry left, + * and it's NOT the leaf level, then we can get rid of this + * level. + */ + if (numrecs == 1 && level > 0) { + union xfs_btree_ptr *pp; + /* + * pp is still set to the first pointer in the block. + * Make it the new root of the btree. + */ + pp = xfs_btree_ptr_addr(cur, 1, block); + error = cur->bc_ops->kill_root(cur, bp, level, pp); + if (error) + goto error0; + } else if (level > 0) { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + } + *stat = 1; + return 0; + } + + /* + * If we deleted the leftmost entry in the block, update the + * key values above us in the tree. + */ + if (ptr == 1) { + error = xfs_btree_updkey(cur, keyp, level + 1); + if (error) + goto error0; + } + + /* + * If the number of records remaining in the block is at least + * the minimum, we're done. + */ + if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + + /* + * Otherwise, we have to move some records around to keep the + * tree balanced. Look at the left and right sibling blocks to + * see if we can re-balance by moving only one record. + */ + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); + + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + /* + * One child of root, need to get a chance to copy its contents + * into the root and delete it. Can't go up to next level, + * there's nothing to delete there. + */ + if (xfs_btree_ptr_is_null(cur, &rptr) && + xfs_btree_ptr_is_null(cur, &lptr) && + level == cur->bc_nlevels - 2) { + error = xfs_btree_kill_iroot(cur); + if (!error) + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + } + + ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || + !xfs_btree_ptr_is_null(cur, &lptr)); + + /* + * Duplicate the cursor so our btree manipulations here won't + * disrupt the next level up. + */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + + /* + * If there's a right sibling, see if it's ok to shift an entry + * out of it. + */ + if (!xfs_btree_ptr_is_null(cur, &rptr)) { + /* + * Move the temp cursor to the last entry in the next block. + * Actually any entry but the first would suffice. + */ + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_increment(tcur, level, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + /* Grab a pointer to the block. */ + right = xfs_btree_get_block(tcur, level, &rbp); +#ifdef DEBUG + error = xfs_btree_check_block(tcur, right, level, rbp); + if (error) + goto error0; +#endif + /* Grab the current block number, for future use. */ + xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); + + /* + * If right block is full enough so that removing one entry + * won't make it too empty, and left-shifting an entry out + * of right to us works, we're done. + */ + if (xfs_btree_get_numrecs(right) - 1 >= + cur->bc_ops->get_minrecs(tcur, level)) { + error = xfs_btree_lshift(tcur, level, &i); + if (error) + goto error0; + if (i) { + ASSERT(xfs_btree_get_numrecs(block) >= + cur->bc_ops->get_minrecs(tcur, level)); + + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + } + + /* + * Otherwise, grab the number of records in right for + * future reference, and fix up the temp cursor to point + * to our block again (last record). + */ + rrecs = xfs_btree_get_numrecs(right); + if (!xfs_btree_ptr_is_null(cur, &lptr)) { + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_decrement(tcur, level, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + } + + /* + * If there's a left sibling, see if it's ok to shift an entry + * out of it. + */ + if (!xfs_btree_ptr_is_null(cur, &lptr)) { + /* + * Move the temp cursor to the first entry in the + * previous block. + */ + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_decrement(tcur, level, &i); + if (error) + goto error0; + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + /* Grab a pointer to the block. */ + left = xfs_btree_get_block(tcur, level, &lbp); +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + /* Grab the current block number, for future use. */ + xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); + + /* + * If left block is full enough so that removing one entry + * won't make it too empty, and right-shifting an entry out + * of left to us works, we're done. + */ + if (xfs_btree_get_numrecs(left) - 1 >= + cur->bc_ops->get_minrecs(tcur, level)) { + error = xfs_btree_rshift(tcur, level, &i); + if (error) + goto error0; + if (i) { + ASSERT(xfs_btree_get_numrecs(block) >= + cur->bc_ops->get_minrecs(tcur, level)); + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + if (level == 0) + cur->bc_ptrs[0]++; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + } + } + + /* + * Otherwise, grab the number of records in right for + * future reference. + */ + lrecs = xfs_btree_get_numrecs(left); + } + + /* Delete the temp cursor, we're done with it. */ + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + + /* If here, we need to do a join to keep the tree balanced. */ + ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); + + if (!xfs_btree_ptr_is_null(cur, &lptr) && + lrecs + xfs_btree_get_numrecs(block) <= + cur->bc_ops->get_maxrecs(cur, level)) { + /* + * Set "right" to be the starting block, + * "left" to be the left neighbor. + */ + rptr = cptr; + right = block; + rbp = bp; + error = xfs_btree_read_buf_block(cur, &lptr, level, + 0, &left, &lbp); + if (error) + goto error0; + + /* + * If that won't work, see if we can join with the right neighbor block. + */ + } else if (!xfs_btree_ptr_is_null(cur, &rptr) && + rrecs + xfs_btree_get_numrecs(block) <= + cur->bc_ops->get_maxrecs(cur, level)) { + /* + * Set "left" to be the starting block, + * "right" to be the right neighbor. + */ + lptr = cptr; + left = block; + lbp = bp; + error = xfs_btree_read_buf_block(cur, &rptr, level, + 0, &right, &rbp); + if (error) + goto error0; + + /* + * Otherwise, we can't fix the imbalance. + * Just return. This is probably a logic error, but it's not fatal. + */ + } else { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + + rrecs = xfs_btree_get_numrecs(right); + lrecs = xfs_btree_get_numrecs(left); + + /* + * We're now going to join "left" and "right" by moving all the stuff + * in "right" to "left" and deleting "right". + */ + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + union xfs_btree_key *rkp; /* right btree key */ + union xfs_btree_ptr *rpp; /* right address pointer */ + + lkp = xfs_btree_key_addr(cur, lrecs + 1, left); + lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); +#ifdef DEBUG + for (i = 1; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, rpp, i, level); + if (error) + goto error0; + } +#endif + xfs_btree_copy_keys(cur, lkp, rkp, rrecs); + xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); + + xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); + xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + union xfs_btree_rec *rrp; /* right record pointer */ + + lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, lrp, rrp, rrecs); + xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); + } + + XFS_BTREE_STATS_INC(cur, join); + + /* + * Fix up the the number of records and right block pointer in the + * surviving block, and log it. + */ + xfs_btree_set_numrecs(left, lrecs + rrecs); + xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), + xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + + /* If there is a right sibling, point it to the remaining block. */ + xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &cptr)) { + error = xfs_btree_read_buf_block(cur, &cptr, level, + 0, &rrblock, &rrbp); + if (error) + goto error0; + xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); + xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); + } + + /* Free the deleted block. */ + error = cur->bc_ops->free_block(cur, rbp); + if (error) + goto error0; + XFS_BTREE_STATS_INC(cur, free); + + /* + * If we joined with the left neighbor, set the buffer in the + * cursor to the left block, and fix up the index. + */ + if (bp != lbp) { + cur->bc_bufs[level] = lbp; + cur->bc_ptrs[level] += lrecs; + cur->bc_ra[level] = 0; + } + /* + * If we joined with the right neighbor and there's a level above + * us, increment the cursor at that level. + */ + else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || + (level + 1 < cur->bc_nlevels)) { + error = xfs_btree_increment(cur, level + 1, &i); + if (error) + goto error0; + } + + /* + * Readjust the ptr at this level if it's not a leaf, since it's + * still pointing at the deletion point, which makes the cursor + * inconsistent. If this makes the ptr 0, the caller fixes it up. + * We can't use decrement because it would change the next level up. + */ + if (level > 0) + cur->bc_ptrs[level]--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + /* Return value means the next level up has something to do. */ + *stat = 2; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (tcur) + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Delete the record pointed to by cur. + * The cursor refers to the place where the record was (could be inserted) + * when the operation returns. + */ +int /* error */ +xfs_btree_delete( + struct xfs_btree_cur *cur, + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int level; + int i; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + /* + * Go up the tree, starting at leaf level. + * + * If 2 is returned then a join was done; go to the next level. + * Otherwise we are done. + */ + for (level = 0, i = 2; i == 2; level++) { + error = xfs_btree_delrec(cur, level, &i); + if (error) + goto error0; + } + + if (i == 0) { + for (level = 1; level < cur->bc_nlevels; level++) { + if (cur->bc_ptrs[level] == 0) { + error = xfs_btree_decrement(cur, level, &i); + if (error) + goto error0; + break; + } + } + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = i; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index ff2552febba..06ef792e0aa 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -192,6 +192,8 @@ struct xfs_btree_ops { /* update btree root pointer */ void (*set_root)(struct xfs_btree_cur *cur, union xfs_btree_ptr *nptr, int level_change); + int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp, + int level, union xfs_btree_ptr *newroot); /* block allocation / freeing */ int (*alloc_block)(struct xfs_btree_cur *cur, @@ -207,6 +209,7 @@ struct xfs_btree_ops { int ptr, int reason); /* records in block/level */ + int (*get_minrecs)(struct xfs_btree_cur *cur, int level); int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); /* records on disk. Matter for the root in inode case. */ @@ -251,6 +254,7 @@ struct xfs_btree_ops { */ #define LASTREC_UPDATE 0 #define LASTREC_INSREC 1 +#define LASTREC_DELREC 2 /* @@ -562,6 +566,7 @@ int xfs_btree_new_root(struct xfs_btree_cur *, int *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); +int xfs_btree_delete(struct xfs_btree_cur *, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index b68e73bb17c..f13f59b13cc 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1168,8 +1168,8 @@ xfs_difree( xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); - if ((error = xfs_inobt_delete(cur, &i))) { - cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n", + if ((error = xfs_btree_delete(cur, &i))) { + cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", error, mp->m_fsname); goto error0; } diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 90f1d4ee772..6c0a07d1fed 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -40,611 +40,6 @@ #include "xfs_alloc.h" #include "xfs_error.h" -STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int); -STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); - -/* - * Single level of the xfs_inobt_delete record deletion routine. - * Delete record pointed to by cur/level. - * Remove the record from its block then rebalance the tree. - * Return 0 for error, 1 for done, 2 to go on to the next level. - */ -STATIC int /* error */ -xfs_inobt_delrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level removing record from */ - int *stat) /* fail/done/go-on */ -{ - xfs_buf_t *agbp; /* buffer for a.g. inode header */ - xfs_mount_t *mp; /* mount structure */ - xfs_agi_t *agi; /* allocation group inode header */ - xfs_inobt_block_t *block; /* btree block record/key lives in */ - xfs_agblock_t bno; /* btree block number */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_inobt_key_t key; /* kp points here if block is level 0 */ - xfs_inobt_key_t *kp = NULL; /* pointer to btree keys */ - xfs_agblock_t lbno; /* left block's block number */ - xfs_buf_t *lbp; /* left block's buffer pointer */ - xfs_inobt_block_t *left; /* left btree block */ - xfs_inobt_key_t *lkp; /* left block key pointer */ - xfs_inobt_ptr_t *lpp; /* left block address pointer */ - int lrecs = 0; /* number of records in left block */ - xfs_inobt_rec_t *lrp; /* left block record pointer */ - xfs_inobt_ptr_t *pp = NULL; /* pointer to btree addresses */ - int ptr; /* index in btree block for this rec */ - xfs_agblock_t rbno; /* right block's block number */ - xfs_buf_t *rbp; /* right block's buffer pointer */ - xfs_inobt_block_t *right; /* right btree block */ - xfs_inobt_key_t *rkp; /* right block key pointer */ - xfs_inobt_rec_t *rp; /* pointer to btree records */ - xfs_inobt_ptr_t *rpp; /* right block address pointer */ - int rrecs = 0; /* number of records in right block */ - int numrecs; - xfs_inobt_rec_t *rrp; /* right block record pointer */ - xfs_btree_cur_t *tcur; /* temporary btree cursor */ - - mp = cur->bc_mp; - - /* - * Get the index of the entry being deleted, check for nothing there. - */ - ptr = cur->bc_ptrs[level]; - if (ptr == 0) { - *stat = 0; - return 0; - } - - /* - * Get the buffer & block containing the record or key/ptr. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - /* - * Fail if we're off the end of the block. - */ - - numrecs = be16_to_cpu(block->bb_numrecs); - if (ptr > numrecs) { - *stat = 0; - return 0; - } - /* - * It's a nonleaf. Excise the key and ptr being deleted, by - * sliding the entries past them down one. - * Log the changed areas of the block. - */ - if (level > 0) { - kp = XFS_INOBT_KEY_ADDR(block, 1, cur); - pp = XFS_INOBT_PTR_ADDR(block, 1, cur); -#ifdef DEBUG - for (i = ptr; i < numrecs; i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level))) - return error; - } -#endif - if (ptr < numrecs) { - memmove(&kp[ptr - 1], &kp[ptr], - (numrecs - ptr) * sizeof(*kp)); - memmove(&pp[ptr - 1], &pp[ptr], - (numrecs - ptr) * sizeof(*kp)); - xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1); - xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1); - } - } - /* - * It's a leaf. Excise the record being deleted, by sliding the - * entries past it down one. Log the changed areas of the block. - */ - else { - rp = XFS_INOBT_REC_ADDR(block, 1, cur); - if (ptr < numrecs) { - memmove(&rp[ptr - 1], &rp[ptr], - (numrecs - ptr) * sizeof(*rp)); - xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1); - } - /* - * If it's the first record in the block, we'll need a key - * structure to pass up to the next level (updkey). - */ - if (ptr == 1) { - key.ir_startino = rp->ir_startino; - kp = &key; - } - } - /* - * Decrement and log the number of entries in the block. - */ - numrecs--; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); - /* - * Is this the root level? If so, we're almost done. - */ - if (level == cur->bc_nlevels - 1) { - /* - * If this is the root level, - * and there's only one entry left, - * and it's NOT the leaf level, - * then we can get rid of this level. - */ - if (numrecs == 1 && level > 0) { - agbp = cur->bc_private.a.agbp; - agi = XFS_BUF_TO_AGI(agbp); - /* - * pp is still set to the first pointer in the block. - * Make it the new root of the btree. - */ - bno = be32_to_cpu(agi->agi_root); - agi->agi_root = *pp; - be32_add_cpu(&agi->agi_level, -1); - /* - * Free the block. - */ - if ((error = xfs_free_extent(cur->bc_tp, - XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1))) - return error; - xfs_trans_binval(cur->bc_tp, bp); - xfs_ialloc_log_agi(cur->bc_tp, agbp, - XFS_AGI_ROOT | XFS_AGI_LEVEL); - /* - * Update the cursor so there's one fewer level. - */ - cur->bc_bufs[level] = NULL; - cur->bc_nlevels--; - } else if (level > 0 && - (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * If we deleted the leftmost entry in the block, update the - * key values above us in the tree. - */ - if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1))) - return error; - /* - * If the number of records remaining in the block is at least - * the minimum, we're done. - */ - if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if (level > 0 && - (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * Otherwise, we have to move some records around to keep the - * tree balanced. Look at the left and right sibling blocks to - * see if we can re-balance by moving only one record. - */ - rbno = be32_to_cpu(block->bb_rightsib); - lbno = be32_to_cpu(block->bb_leftsib); - bno = NULLAGBLOCK; - ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK); - /* - * Duplicate the cursor so our btree manipulations here won't - * disrupt the next level up. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - return error; - /* - * If there's a right sibling, see if it's ok to shift an entry - * out of it. - */ - if (rbno != NULLAGBLOCK) { - /* - * Move the temp cursor to the last entry in the next block. - * Actually any entry but the first would suffice. - */ - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Grab a pointer to the block. - */ - rbp = tcur->bc_bufs[level]; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - goto error0; -#endif - /* - * Grab the current block number, for future use. - */ - bno = be32_to_cpu(right->bb_leftsib); - /* - * If right block is full enough so that removing one entry - * won't make it too empty, and left-shifting an entry out - * of right to us works, we're done. - */ - if (be16_to_cpu(right->bb_numrecs) - 1 >= - XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_btree_lshift(tcur, level, &i))) - goto error0; - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_INOBT_BLOCK_MINRECS(level, cur)); - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - if (level > 0 && - (error = xfs_btree_decrement(cur, level, - &i))) - return error; - *stat = 1; - return 0; - } - } - /* - * Otherwise, grab the number of records in right for - * future reference, and fix up the temp cursor to point - * to our block again (last record). - */ - rrecs = be16_to_cpu(right->bb_numrecs); - if (lbno != NULLAGBLOCK) { - xfs_btree_firstrec(tcur, level); - if ((error = xfs_btree_decrement(tcur, level, &i))) - goto error0; - } - } - /* - * If there's a left sibling, see if it's ok to shift an entry - * out of it. - */ - if (lbno != NULLAGBLOCK) { - /* - * Move the temp cursor to the first entry in the - * previous block. - */ - xfs_btree_firstrec(tcur, level); - if ((error = xfs_btree_decrement(tcur, level, &i))) - goto error0; - xfs_btree_firstrec(tcur, level); - /* - * Grab a pointer to the block. - */ - lbp = tcur->bc_bufs[level]; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - goto error0; -#endif - /* - * Grab the current block number, for future use. - */ - bno = be32_to_cpu(left->bb_rightsib); - /* - * If left block is full enough so that removing one entry - * won't make it too empty, and right-shifting an entry out - * of left to us works, we're done. - */ - if (be16_to_cpu(left->bb_numrecs) - 1 >= - XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_btree_rshift(tcur, level, &i))) - goto error0; - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_INOBT_BLOCK_MINRECS(level, cur)); - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - if (level == 0) - cur->bc_ptrs[0]++; - *stat = 1; - return 0; - } - } - /* - * Otherwise, grab the number of records in right for - * future reference. - */ - lrecs = be16_to_cpu(left->bb_numrecs); - } - /* - * Delete the temp cursor, we're done with it. - */ - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - /* - * If here, we need to do a join to keep the tree balanced. - */ - ASSERT(bno != NULLAGBLOCK); - /* - * See if we can join with the left neighbor block. - */ - if (lbno != NULLAGBLOCK && - lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - /* - * Set "right" to be the starting block, - * "left" to be the left neighbor. - */ - rbno = bno; - right = block; - rrecs = be16_to_cpu(right->bb_numrecs); - rbp = bp; - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, lbno, 0, &lbp, - XFS_INO_BTREE_REF))) - return error; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); - lrecs = be16_to_cpu(left->bb_numrecs); - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; - } - /* - * If that won't work, see if we can join with the right neighbor block. - */ - else if (rbno != NULLAGBLOCK && - rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - /* - * Set "left" to be the starting block, - * "right" to be the right neighbor. - */ - lbno = bno; - left = block; - lrecs = be16_to_cpu(left->bb_numrecs); - lbp = bp; - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, rbno, 0, &rbp, - XFS_INO_BTREE_REF))) - return error; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); - rrecs = be16_to_cpu(right->bb_numrecs); - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; - } - /* - * Otherwise, we can't fix the imbalance. - * Just return. This is probably a logic error, but it's not fatal. - */ - else { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * We're now going to join "left" and "right" by moving all the stuff - * in "right" to "left" and deleting "right". - */ - if (level > 0) { - /* - * It's a non-leaf. Move keys and pointers. - */ - lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur); - lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur); - rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); - rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < rrecs; i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) - return error; - } -#endif - memcpy(lkp, rkp, rrecs * sizeof(*lkp)); - memcpy(lpp, rpp, rrecs * sizeof(*lpp)); - xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); - xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); - } else { - /* - * It's a leaf. Move records. - */ - lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur); - rrp = XFS_INOBT_REC_ADDR(right, 1, cur); - memcpy(lrp, rrp, rrecs * sizeof(*lrp)); - xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); - } - /* - * If we joined with the left neighbor, set the buffer in the - * cursor to the left block, and fix up the index. - */ - if (bp != lbp) { - xfs_btree_setbuf(cur, level, lbp); - cur->bc_ptrs[level] += lrecs; - } - /* - * If we joined with the right neighbor and there's a level above - * us, increment the cursor at that level. - */ - else if (level + 1 < cur->bc_nlevels && - (error = xfs_btree_increment(cur, level + 1, &i))) - return error; - /* - * Fix up the number of records in the surviving block. - */ - lrecs += rrecs; - left->bb_numrecs = cpu_to_be16(lrecs); - /* - * Fix up the right block pointer in the surviving block, and log it. - */ - left->bb_rightsib = right->bb_rightsib; - xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - /* - * If there is a right sibling now, make it point to the - * remaining block. - */ - if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) { - xfs_inobt_block_t *rrblock; - xfs_buf_t *rrbp; - - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, - &rrbp, XFS_INO_BTREE_REF))) - return error; - rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); - if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) - return error; - rrblock->bb_leftsib = cpu_to_be32(lbno); - xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); - } - /* - * Free the deleting block. - */ - if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, - cur->bc_private.a.agno, rbno), 1))) - return error; - xfs_trans_binval(cur->bc_tp, rbp); - /* - * Readjust the ptr at this level if it's not a leaf, since it's - * still pointing at the deletion point, which makes the cursor - * inconsistent. If this makes the ptr 0, the caller fixes it up. - * We can't use decrement because it would change the next level up. - */ - if (level > 0) - cur->bc_ptrs[level]--; - /* - * Return value means the next level up has something to do. - */ - *stat = 2; - return 0; - -error0: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Log header fields from a btree block. - */ -STATIC void -xfs_inobt_log_block( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer containing btree block */ - int fields) /* mask of fields: XFS_BB_... */ -{ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - static const short offsets[] = { /* table of offsets */ - offsetof(xfs_inobt_block_t, bb_magic), - offsetof(xfs_inobt_block_t, bb_level), - offsetof(xfs_inobt_block_t, bb_numrecs), - offsetof(xfs_inobt_block_t, bb_leftsib), - offsetof(xfs_inobt_block_t, bb_rightsib), - sizeof(xfs_inobt_block_t) - }; - - xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); - xfs_trans_log_buf(tp, bp, first, last); -} - -/* - * Log keys from a btree block (nonleaf). - */ -STATIC void -xfs_inobt_log_keys( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int kfirst, /* index of first key to log */ - int klast) /* index of last key to log */ -{ - xfs_inobt_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - xfs_inobt_key_t *kp; /* key pointer in btree block */ - int last; /* last byte offset logged */ - - block = XFS_BUF_TO_INOBT_BLOCK(bp); - kp = XFS_INOBT_KEY_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - -/* - * Log block pointer fields from a btree block (nonleaf). - */ -STATIC void -xfs_inobt_log_ptrs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int pfirst, /* index of first pointer to log */ - int plast) /* index of last pointer to log */ -{ - xfs_inobt_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - xfs_inobt_ptr_t *pp; /* block-pointer pointer in btree blk */ - - block = XFS_BUF_TO_INOBT_BLOCK(bp); - pp = XFS_INOBT_PTR_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - -/* - * Log records from a btree block (leaf). - */ -STATIC void -xfs_inobt_log_recs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int rfirst, /* index of first record to log */ - int rlast) /* index of last record to log */ -{ - xfs_inobt_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - xfs_inobt_rec_t *rp; /* record pointer for btree block */ - - block = XFS_BUF_TO_INOBT_BLOCK(bp); - rp = XFS_INOBT_REC_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - - -/* - * Externally visible routines. - */ - -/* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -int /* error */ -xfs_inobt_delete( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; - int i; /* result code */ - int level; /* btree level */ - - /* - * Go up the tree, starting at leaf level. - * If 2 is returned then a join was done; go to the next level. - * Otherwise we are done. - */ - for (level = 0, i = 2; i == 2; level++) { - if ((error = xfs_inobt_delrec(cur, level, &i))) - return error; - } - if (i == 0) { - for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_btree_decrement(cur, level, &i))) - return error; - break; - } - } - } - *stat = i; - return 0; -} - /* * Get the data from the pointed-to record. @@ -690,6 +85,13 @@ xfs_inobt_get_rec( return 0; } +STATIC int +xfs_inobt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_inobt_mnr[level != 0]; +} STATIC struct xfs_btree_cur * xfs_inobt_dup_cursor( @@ -829,6 +231,38 @@ xfs_inobt_key_diff( cur->bc_rec.i.ir_startino; } +STATIC int +xfs_inobt_kill_root( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + union xfs_btree_ptr *newroot) +{ + int error; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, killroot); + + /* + * Update the root pointer, decreasing the level by 1 and then + * free the old root. + */ + xfs_inobt_set_root(cur, newroot, -1); + error = xfs_inobt_free_block(cur, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level] = NULL; + cur->bc_nlevels--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_inobt_trace_buf; @@ -901,8 +335,10 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_inobt_set_root, + .kill_root = xfs_inobt_kill_root, .alloc_block = xfs_inobt_alloc_block, .free_block = xfs_inobt_free_block, + .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, .init_rec_from_key = xfs_inobt_init_rec_from_key, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index c9cbc4f2168..3eff3b6e5fa 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -116,13 +116,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \ i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) -/* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat); - /* * Get the data from the pointed-to record. */ -- cgit v1.2.3 From 8cc938fe4237e50bea4aa557ed53b06de2319d49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:11 +1100 Subject: [XFS] implement generic xfs_btree_get_rec Not really much reason to make it generic given that it's so small, but this is the last non-method in xfs_alloc_btree.c and xfs_ialloc_btree.c, so it makes the whole btree implementation more structured. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32206a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc.c | 21 +++++++++++++++++++++ fs/xfs/xfs_alloc_btree.c | 44 -------------------------------------------- fs/xfs/xfs_alloc_btree.h | 6 ------ fs/xfs/xfs_btree.c | 41 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree.h | 1 + fs/xfs/xfs_ialloc.c | 23 +++++++++++++++++++++++ fs/xfs/xfs_ialloc.h | 5 +++++ fs/xfs/xfs_ialloc_btree.c | 44 -------------------------------------------- fs/xfs/xfs_ialloc_btree.h | 7 ------- 9 files changed, 91 insertions(+), 101 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index e9c70249d2c..54fa69e2776 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -154,6 +154,27 @@ xfs_alloc_update( return xfs_btree_update(cur, &rec); } +/* + * Get the data from the pointed-to record. + */ +STATIC int /* error */ +xfs_alloc_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat) /* output: success/failure */ +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (!error && *stat == 1) { + *bno = be32_to_cpu(rec->alloc.ar_startblock); + *len = be32_to_cpu(rec->alloc.ar_blockcount); + } + return error; +} + /* * Compute aligned version of the found extent. * Takes alignment and min length into account. diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index d256b51f913..4d44f03858b 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -41,50 +41,6 @@ #include "xfs_error.h" -/* - * Get the data from the pointed-to record. - */ -int /* error */ -xfs_alloc_get_rec( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t *bno, /* output: starting block of extent */ - xfs_extlen_t *len, /* output: length of extent */ - int *stat) /* output: success/failure */ -{ - xfs_alloc_block_t *block; /* btree block */ -#ifdef DEBUG - int error; /* error return value */ -#endif - int ptr; /* record number */ - - ptr = cur->bc_ptrs[0]; - block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) - return error; -#endif - /* - * Off the right end or left end, return failure. - */ - if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) { - *stat = 0; - return 0; - } - /* - * Point to the record and extract its data. - */ - { - xfs_alloc_rec_t *rec; /* record data */ - - rec = XFS_ALLOC_REC_ADDR(block, ptr, cur); - *bno = be32_to_cpu(rec->ar_startblock); - *len = be32_to_cpu(rec->ar_blockcount); - } - *stat = 1; - return 0; -} - - STATIC struct xfs_btree_cur * xfs_allocbt_dup_cursor( struct xfs_btree_cur *cur) diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 8d2e3ec21fd..22f1d709af7 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -94,12 +94,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur)) -/* - * Get the data from the pointed-to record. - */ -extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, - xfs_extlen_t *len, int *stat); - extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 28cc7681834..8503ed5d10a 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -3764,3 +3764,44 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_btree_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + union xfs_btree_rec **recp, /* output: btree record */ + int *stat) /* output: success/failure */ +{ + struct xfs_btree_block *block; /* btree block */ + struct xfs_buf *bp; /* buffer pointer */ + int ptr; /* record number */ +#ifdef DEBUG + int error; /* error return value */ +#endif + + ptr = cur->bc_ptrs[0]; + block = xfs_btree_get_block(cur, 0, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, 0, bp); + if (error) + return error; +#endif + + /* + * Off the right end or left end, return failure. + */ + if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { + *stat = 0; + return 0; + } + + /* + * Point to the record and extract its data. + */ + *recp = xfs_btree_rec_addr(cur, ptr, block); + *stat = 1; + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 06ef792e0aa..cee3684d871 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -567,6 +567,7 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); int xfs_btree_delete(struct xfs_btree_cur *, int *); +int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index f13f59b13cc..c8a56c52964 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -191,6 +191,29 @@ xfs_inobt_update( return xfs_btree_update(cur, &rec); } +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_inobt_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t *ino, /* output: starting inode of chunk */ + __int32_t *fcnt, /* output: number of free inodes */ + xfs_inofree_t *free, /* output: free inode mask */ + int *stat) /* output: success/failure */ +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (!error && *stat == 1) { + *ino = be32_to_cpu(rec->inobt.ir_startino); + *fcnt = be32_to_cpu(rec->inobt.ir_freecount); + *free = be64_to_cpu(rec->inobt.ir_free); + } + return error; +} + /* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 4026578bc26..c5745f6d94e 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -168,6 +168,11 @@ int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, __int32_t fcnt, xfs_inofree_t free, int *stat); +/* + * Get the data from the pointed-to record. + */ +extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, + __int32_t *fcnt, xfs_inofree_t *free, int *stat); #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 6c0a07d1fed..9f4e33c945c 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -41,50 +41,6 @@ #include "xfs_error.h" -/* - * Get the data from the pointed-to record. - */ -int /* error */ -xfs_inobt_get_rec( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t *ino, /* output: starting inode of chunk */ - __int32_t *fcnt, /* output: number of free inodes */ - xfs_inofree_t *free, /* output: free inode mask */ - int *stat) /* output: success/failure */ -{ - xfs_inobt_block_t *block; /* btree block */ - xfs_buf_t *bp; /* buffer containing btree block */ -#ifdef DEBUG - int error; /* error return value */ -#endif - int ptr; /* record number */ - xfs_inobt_rec_t *rec; /* record data */ - - bp = cur->bc_bufs[0]; - ptr = cur->bc_ptrs[0]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, 0, bp))) - return error; -#endif - /* - * Off the right end or left end, return failure. - */ - if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) { - *stat = 0; - return 0; - } - /* - * Point to the record and extract its data. - */ - rec = XFS_INOBT_REC_ADDR(block, ptr, cur); - *ino = be32_to_cpu(rec->ir_startino); - *fcnt = be32_to_cpu(rec->ir_freecount); - *free = be64_to_cpu(rec->ir_free); - *stat = 1; - return 0; -} - STATIC int xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 3eff3b6e5fa..ff7406b4bac 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -116,13 +116,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \ i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) -/* - * Get the data from the pointed-to record. - */ -extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, - __int32_t *fcnt, xfs_inofree_t *free, int *stat); - - extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); -- cgit v1.2.3 From fd6bcc5b63051392ba709a8fd33173b263669e0a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:21 +1100 Subject: [XFS] kill xfs_bmbt_log_block and xfs_bmbt_log_recs These are equivalent to the xfs_btree_* versions, and the only remaining caller can be switched to the generic one after they are exported. Also remove some now dead infrastructure in xfs_bmap_btree.c. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32207a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_bmap.c | 4 +-- fs/xfs/xfs_bmap_btree.c | 88 ------------------------------------------------- fs/xfs/xfs_bmap_btree.h | 4 --- fs/xfs/xfs_btree.c | 4 +-- fs/xfs/xfs_btree.h | 6 ++++ 5 files changed, 10 insertions(+), 96 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 74761ca2c63..5cceb8d3c16 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3563,8 +3563,8 @@ xfs_bmap_extents_to_btree( * Do all this logging at the end so that * the root is at the right level. */ - xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS); - xfs_bmbt_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); + xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); + xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); ASSERT(*curp == NULL); *curp = cur; *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 5b8030561d7..7a02d391afe 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -44,33 +44,6 @@ #include "xfs_error.h" #include "xfs_quota.h" -#undef EXIT - -#define ENTRY XBT_ENTRY -#define ERROR XBT_ERROR -#define EXIT XBT_EXIT - -/* - * Keep the XFS_BMBT_TRACE_ names around for now until all code using them - * is converted to be generic and thus switches to the XFS_BTREE_TRACE_ names. - */ -#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ - XFS_BTREE_TRACE_ARGBI(c,b,i) -#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ - XFS_BTREE_TRACE_ARGBII(c,b,i,j) -#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ - XFS_BTREE_TRACE_ARGFFFI(c,o,b,i,j) -#define XFS_BMBT_TRACE_ARGI(c,i) \ - XFS_BTREE_TRACE_ARGI(c,i) -#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ - XFS_BTREE_TRACE_ARGIPK(c,i,(union xfs_btree_ptr)f,s) -#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ - XFS_BTREE_TRACE_ARGIPR(c,i, \ - (union xfs_btree_ptr)f, (union xfs_btree_rec *)r) -#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ - XFS_BTREE_TRACE_ARGIK(c,i,(union xfs_btree_key *)k) -#define XFS_BMBT_TRACE_CURSOR(c,s) \ - XFS_BTREE_TRACE_CURSOR(c,s) /* * Determine the extent state. @@ -259,67 +232,6 @@ xfs_bmbt_disk_get_startoff( XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; } -/* - * Log fields from the btree block header. - */ -void -xfs_bmbt_log_block( - xfs_btree_cur_t *cur, - xfs_buf_t *bp, - int fields) -{ - int first; - int last; - xfs_trans_t *tp; - static const short offsets[] = { - offsetof(xfs_bmbt_block_t, bb_magic), - offsetof(xfs_bmbt_block_t, bb_level), - offsetof(xfs_bmbt_block_t, bb_numrecs), - offsetof(xfs_bmbt_block_t, bb_leftsib), - offsetof(xfs_bmbt_block_t, bb_rightsib), - sizeof(xfs_bmbt_block_t) - }; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGBI(cur, bp, fields); - tp = cur->bc_tp; - if (bp) { - xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, - &last); - xfs_trans_log_buf(tp, bp, first, last); - } else - xfs_trans_log_inode(tp, cur->bc_private.b.ip, - XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); -} - -/* - * Log record values from the btree block. - */ -void -xfs_bmbt_log_recs( - xfs_btree_cur_t *cur, - xfs_buf_t *bp, - int rfirst, - int rlast) -{ - xfs_bmbt_block_t *block; - int first; - int last; - xfs_bmbt_rec_t *rp; - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast); - ASSERT(bp); - tp = cur->bc_tp; - block = XFS_BUF_TO_BMBT_BLOCK(bp); - rp = XFS_BMAP_REC_DADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(tp, bp, first, last); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); -} /* * Set all the fields in a bmap extent record from the arguments. diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 952ab395f79..6f38e250570 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -247,10 +247,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); -extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); -extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, - int); - extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 8503ed5d10a..88eb00bdeb9 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1309,7 +1309,7 @@ xfs_btree_log_keys( /* * Log record values from the btree block. */ -STATIC void +void xfs_btree_log_recs( struct xfs_btree_cur *cur, struct xfs_buf *bp, @@ -1357,7 +1357,7 @@ xfs_btree_log_ptrs( /* * Log fields from a btree block header. */ -STATIC void +void xfs_btree_log_block( struct xfs_btree_cur *cur, /* btree cursor */ struct xfs_buf *bp, /* buffer containing btree block */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index cee3684d871..d6cc71e31de 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -569,6 +569,12 @@ int xfs_btree_insert(struct xfs_btree_cur *, int *); int xfs_btree_delete(struct xfs_btree_cur *, int *); int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); +/* + * Internal btree helpers also used by xfs_bmap.c. + */ +void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); +void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); + /* * Helpers. */ -- cgit v1.2.3 From 4a26e66e7728112f0e1cd7eca3bcc430b3a221c9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:32 +1100 Subject: [XFS] add keys_inorder and recs_inorder btree methods Add methods to check whether two keys/records are in the righ order. This replaces the xfs_btree_check_key and xfs_btree_check_rec methods. For the callers from xfs_bmap.c just opencode the bmbt-specific asserts. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32208a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_alloc_btree.c | 44 ++++++++++++++ fs/xfs/xfs_bmap.c | 11 +++- fs/xfs/xfs_bmap_btree.c | 28 +++++++++ fs/xfs/xfs_btree.c | 150 +++++----------------------------------------- fs/xfs/xfs_btree.h | 36 ++++------- fs/xfs/xfs_ialloc_btree.c | 27 +++++++++ 6 files changed, 135 insertions(+), 161 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 4d44f03858b..9e63f8c180d 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -311,6 +311,45 @@ xfs_allocbt_kill_root( return 0; } +#ifdef DEBUG +STATIC int +xfs_allocbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock); + } else { + return be32_to_cpu(k1->alloc.ar_blockcount) < + be32_to_cpu(k2->alloc.ar_blockcount) || + (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && + be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock)); + } +} + +STATIC int +xfs_allocbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return be32_to_cpu(r1->alloc.ar_startblock) + + be32_to_cpu(r1->alloc.ar_blockcount) <= + be32_to_cpu(r2->alloc.ar_startblock); + } else { + return be32_to_cpu(r1->alloc.ar_blockcount) < + be32_to_cpu(r2->alloc.ar_blockcount) || + (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && + be32_to_cpu(r1->alloc.ar_startblock) < + be32_to_cpu(r2->alloc.ar_startblock)); + } +} +#endif /* DEBUG */ + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_allocbt_trace_buf; @@ -395,6 +434,11 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, +#ifdef DEBUG + .keys_inorder = xfs_allocbt_keys_inorder, + .recs_inorder = xfs_allocbt_recs_inorder, +#endif + #ifdef XFS_BTREE_TRACE .trace_enter = xfs_allocbt_trace_enter, .trace_cursor = xfs_allocbt_trace_cursor, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 5cceb8d3c16..b7f99d7576d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -6195,7 +6195,8 @@ xfs_check_block( } if (prevp) { - xfs_btree_check_key(XFS_BTNUM_BMAP, prevp, keyp); + ASSERT(be64_to_cpu(prevp->br_startoff) < + be64_to_cpu(keyp->br_startoff)); } prevp = keyp; @@ -6338,11 +6339,15 @@ xfs_bmap_check_leaf_extents( ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); if (i) { - xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep); + ASSERT(xfs_bmbt_disk_get_startoff(&last) + + xfs_bmbt_disk_get_blockcount(&last) <= + xfs_bmbt_disk_get_startoff(ep)); } for (j = 1; j < num_recs; j++) { nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); - xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp); + ASSERT(xfs_bmbt_disk_get_startoff(ep) + + xfs_bmbt_disk_get_blockcount(ep) <= + xfs_bmbt_disk_get_startoff(nextp)); ep = nextp; } diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 7a02d391afe..c5eeb3241e2 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -699,6 +699,29 @@ xfs_bmbt_key_diff( cur->bc_rec.b.br_startoff; } +#ifdef DEBUG +STATIC int +xfs_bmbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be64_to_cpu(k1->bmbt.br_startoff) < + be64_to_cpu(k2->bmbt.br_startoff); +} + +STATIC int +xfs_bmbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return xfs_bmbt_disk_get_startoff(&r1->bmbt) + + xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= + xfs_bmbt_disk_get_startoff(&r2->bmbt); +} +#endif /* DEBUG */ + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_bmbt_trace_buf; @@ -801,6 +824,11 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, +#ifdef DEBUG + .keys_inorder = xfs_bmbt_keys_inorder, + .recs_inorder = xfs_bmbt_recs_inorder, +#endif + #ifdef XFS_BTREE_TRACE .trace_enter = xfs_bmbt_trace_enter, .trace_cursor = xfs_bmbt_trace_cursor, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 88eb00bdeb9..d667d30210a 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -52,122 +52,6 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }; -/* - * External routines. - */ - -#ifdef DEBUG -/* - * Debug routine: check that keys are in the right order. - */ -void -xfs_btree_check_key( - xfs_btnum_t btnum, /* btree identifier */ - void *ak1, /* pointer to left (lower) key */ - void *ak2) /* pointer to right (higher) key */ -{ - switch (btnum) { - case XFS_BTNUM_BNO: { - xfs_alloc_key_t *k1; - xfs_alloc_key_t *k2; - - k1 = ak1; - k2 = ak2; - ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)); - break; - } - case XFS_BTNUM_CNT: { - xfs_alloc_key_t *k1; - xfs_alloc_key_t *k2; - - k1 = ak1; - k2 = ak2; - ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) || - (k1->ar_blockcount == k2->ar_blockcount && - be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock))); - break; - } - case XFS_BTNUM_BMAP: { - xfs_bmbt_key_t *k1; - xfs_bmbt_key_t *k2; - - k1 = ak1; - k2 = ak2; - ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff)); - break; - } - case XFS_BTNUM_INO: { - xfs_inobt_key_t *k1; - xfs_inobt_key_t *k2; - - k1 = ak1; - k2 = ak2; - ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino)); - break; - } - default: - ASSERT(0); - } -} - -/* - * Debug routine: check that records are in the right order. - */ -void -xfs_btree_check_rec( - xfs_btnum_t btnum, /* btree identifier */ - void *ar1, /* pointer to left (lower) record */ - void *ar2) /* pointer to right (higher) record */ -{ - switch (btnum) { - case XFS_BTNUM_BNO: { - xfs_alloc_rec_t *r1; - xfs_alloc_rec_t *r2; - - r1 = ar1; - r2 = ar2; - ASSERT(be32_to_cpu(r1->ar_startblock) + - be32_to_cpu(r1->ar_blockcount) <= - be32_to_cpu(r2->ar_startblock)); - break; - } - case XFS_BTNUM_CNT: { - xfs_alloc_rec_t *r1; - xfs_alloc_rec_t *r2; - - r1 = ar1; - r2 = ar2; - ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) || - (r1->ar_blockcount == r2->ar_blockcount && - be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock))); - break; - } - case XFS_BTNUM_BMAP: { - xfs_bmbt_rec_t *r1; - xfs_bmbt_rec_t *r2; - - r1 = ar1; - r2 = ar2; - ASSERT(xfs_bmbt_disk_get_startoff(r1) + - xfs_bmbt_disk_get_blockcount(r1) <= - xfs_bmbt_disk_get_startoff(r2)); - break; - } - case XFS_BTNUM_INO: { - xfs_inobt_rec_t *r1; - xfs_inobt_rec_t *r2; - - r1 = ar1; - r2 = ar2; - ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <= - be32_to_cpu(r2->ir_startino)); - break; - } - default: - ASSERT(0); - } -} -#endif /* DEBUG */ int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lblock( @@ -2032,9 +1916,8 @@ xfs_btree_lshift( xfs_btree_log_keys(cur, lbp, lrecs, lrecs); xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); - xfs_btree_check_key(cur->bc_btnum, - xfs_btree_key_addr(cur, lrecs - 1, left), - lkp); + ASSERT(cur->bc_ops->keys_inorder(cur, + xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); } else { /* It's a leaf. Move records. */ union xfs_btree_rec *lrp; /* left record pointer */ @@ -2045,9 +1928,8 @@ xfs_btree_lshift( xfs_btree_copy_recs(cur, lrp, rrp, 1); xfs_btree_log_recs(cur, lbp, lrecs, lrecs); - xfs_btree_check_rec(cur->bc_btnum, - xfs_btree_rec_addr(cur, lrecs - 1, left), - lrp); + ASSERT(cur->bc_ops->recs_inorder(cur, + xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); } xfs_btree_set_numrecs(left, lrecs); @@ -2222,8 +2104,8 @@ xfs_btree_rshift( xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); - xfs_btree_check_key(cur->bc_btnum, rkp, - xfs_btree_key_addr(cur, 2, right)); + ASSERT(cur->bc_ops->keys_inorder(cur, rkp, + xfs_btree_key_addr(cur, 2, right))); } else { /* It's a leaf. make a hole in the records */ union xfs_btree_rec *lrp; @@ -2241,8 +2123,8 @@ xfs_btree_rshift( cur->bc_ops->init_key_from_rec(&key, rrp); rkp = &key; - xfs_btree_check_rec(cur->bc_btnum, rrp, - xfs_btree_rec_addr(cur, 2, right)); + ASSERT(cur->bc_ops->recs_inorder(cur, rrp, + xfs_btree_rec_addr(cur, 2, right))); } /* @@ -2849,11 +2731,11 @@ xfs_btree_insrec( /* Check that the new entry is being inserted in the right place. */ if (ptr <= numrecs) { if (level == 0) { - xfs_btree_check_rec(cur->bc_btnum, recp, - xfs_btree_rec_addr(cur, ptr, block)); + ASSERT(cur->bc_ops->recs_inorder(cur, recp, + xfs_btree_rec_addr(cur, ptr, block))); } else { - xfs_btree_check_key(cur->bc_btnum, &key, - xfs_btree_key_addr(cur, ptr, block)); + ASSERT(cur->bc_ops->keys_inorder(cur, &key, + xfs_btree_key_addr(cur, ptr, block))); } } #endif @@ -2923,8 +2805,8 @@ xfs_btree_insrec( xfs_btree_log_keys(cur, bp, ptr, numrecs); #ifdef DEBUG if (ptr < numrecs) { - xfs_btree_check_key(cur->bc_btnum, kp, - xfs_btree_key_addr(cur, ptr + 1, block)); + ASSERT(cur->bc_ops->keys_inorder(cur, kp, + xfs_btree_key_addr(cur, ptr + 1, block))); } #endif } else { @@ -2941,8 +2823,8 @@ xfs_btree_insrec( xfs_btree_log_recs(cur, bp, ptr, numrecs); #ifdef DEBUG if (ptr < numrecs) { - xfs_btree_check_rec(cur->bc_btnum, rp, - xfs_btree_rec_addr(cur, ptr + 1, block)); + ASSERT(cur->bc_ops->recs_inorder(cur, rp, + xfs_btree_rec_addr(cur, ptr + 1, block))); } #endif } diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index d6cc71e31de..25a488d4da1 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -229,6 +229,18 @@ struct xfs_btree_ops { __int64_t (*key_diff)(struct xfs_btree_cur *cur, union xfs_btree_key *key); +#ifdef DEBUG + /* check that k1 is lower than k2 */ + int (*keys_inorder)(struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2); + + /* check that r1 is lower than r2 */ + int (*recs_inorder)(struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2); +#endif + /* btree tracing */ #ifdef XFS_BTREE_TRACE void (*trace_enter)(struct xfs_btree_cur *, const char *, @@ -379,30 +391,6 @@ xfs_btree_check_ptr( int index, /* offset from ptr to check */ int level); /* btree block level */ -#ifdef DEBUG - -/* - * Debug routine: check that keys are in the right order. - */ -void -xfs_btree_check_key( - xfs_btnum_t btnum, /* btree identifier */ - void *ak1, /* pointer to left (lower) key */ - void *ak2); /* pointer to right (higher) key */ - -/* - * Debug routine: check that records are in the right order. - */ -void -xfs_btree_check_rec( - xfs_btnum_t btnum, /* btree identifier */ - void *ar1, /* pointer to left (lower) record */ - void *ar2); /* pointer to right (higher) record */ -#else -#define xfs_btree_check_key(a, b, c) -#define xfs_btree_check_rec(a, b, c) -#endif /* DEBUG */ - /* * Delete the btree cursor. */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 9f4e33c945c..dcd4a956e73 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -219,6 +219,28 @@ xfs_inobt_kill_root( return 0; } +#ifdef DEBUG +STATIC int +xfs_inobt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be32_to_cpu(k1->inobt.ir_startino) < + be32_to_cpu(k2->inobt.ir_startino); +} + +STATIC int +xfs_inobt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= + be32_to_cpu(r2->inobt.ir_startino); +} +#endif /* DEBUG */ + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_inobt_trace_buf; @@ -302,6 +324,11 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, +#ifdef DEBUG + .keys_inorder = xfs_inobt_keys_inorder, + .recs_inorder = xfs_inobt_recs_inorder, +#endif + #ifdef XFS_BTREE_TRACE .trace_enter = xfs_inobt_trace_enter, .trace_cursor = xfs_inobt_trace_cursor, -- cgit v1.2.3 From 3cc7524c8445e6244b055f3fa338529188c7c260 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:41 +1100 Subject: [XFS] mark various functions in xfs_btree.c static Lots of functionality in xfs_btree.c isn't needed by callers outside of this file anymore, so mark these functions static. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32209a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_btree.c | 22 +++++++++--------- fs/xfs/xfs_btree.h | 67 ------------------------------------------------------ 2 files changed, 11 insertions(+), 78 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index d667d30210a..b735c7299a8 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -87,7 +87,7 @@ xfs_btree_check_lblock( return 0; } -int /* error (0 or EFSCORRUPTED) */ +STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sblock( struct xfs_btree_cur *cur, /* btree cursor */ struct xfs_btree_sblock *block, /* btree short form block pointer */ @@ -163,7 +163,7 @@ xfs_btree_check_lptr( /* * Check that (short) pointer is ok. */ -int /* error (0 or EFSCORRUPTED) */ +STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sptr( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* btree block disk address */ @@ -182,7 +182,7 @@ xfs_btree_check_sptr( /* * Check that block ptr is ok. */ -int /* error (0 or EFSCORRUPTED) */ +STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_ptr( struct xfs_btree_cur *cur, /* btree cursor */ union xfs_btree_ptr *ptr, /* btree block disk address */ @@ -523,7 +523,7 @@ xfs_btree_islastblock( * Change the cursor to point to the first record at the given level. * Other levels are unaffected. */ -int /* success=1, failure=0 */ +STATIC int /* success=1, failure=0 */ xfs_btree_firstrec( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to change */ @@ -552,7 +552,7 @@ xfs_btree_firstrec( * Change the cursor to point to the last record in the current block * at the given level. Other levels are unaffected. */ -int /* success=1, failure=0 */ +STATIC int /* success=1, failure=0 */ xfs_btree_lastrec( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to change */ @@ -775,7 +775,7 @@ xfs_btree_readahead_sblock( * Read-ahead btree blocks, at the given level. * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. */ -int +STATIC int xfs_btree_readahead( struct xfs_btree_cur *cur, /* btree cursor */ int lev, /* level in btree */ @@ -1711,7 +1711,7 @@ error0: /* * Update keys at all levels from here to the root along the cursor's path. */ -int +STATIC int xfs_btree_updkey( struct xfs_btree_cur *cur, union xfs_btree_key *keyp, @@ -1821,7 +1821,7 @@ error0: * Move 1 record left from cur/level if possible. * Update cur to reflect the new path. */ -int /* error */ +STATIC int /* error */ xfs_btree_lshift( struct xfs_btree_cur *cur, int level, @@ -2004,7 +2004,7 @@ error0: * Move 1 record right from cur/level if possible. * Update cur to reflect the new path. */ -int /* error */ +STATIC int /* error */ xfs_btree_rshift( struct xfs_btree_cur *cur, int level, @@ -2180,7 +2180,7 @@ error1: * Return new block number and the key to its first * record (to be inserted into parent). */ -int /* error */ +STATIC int /* error */ xfs_btree_split( struct xfs_btree_cur *cur, int level, @@ -2465,7 +2465,7 @@ error0: /* * Allocate a new root block, fill it in. */ -int /* error */ +STATIC int /* error */ xfs_btree_new_root( struct xfs_btree_cur *cur, /* btree cursor */ int *stat) /* success/failure */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 25a488d4da1..2a22c587f1a 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -339,16 +339,6 @@ xfs_btree_check_lblock( int level, /* level of the btree block */ struct xfs_buf *bp); /* buffer containing block, if any */ -/* - * Check that short form block header is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sblock( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_sblock *block, /* btree short form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp); /* buffer containing block */ - /* * Check that block header is ok. */ @@ -368,29 +358,6 @@ xfs_btree_check_lptr( xfs_dfsbno_t ptr, /* btree block disk address */ int level); /* btree block level */ -#define xfs_btree_check_lptr_disk(cur, ptr, level) \ - xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level) - - -/* - * Check that (short) pointer is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sptr( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t ptr, /* btree block disk address */ - int level); /* btree block level */ - -/* - * Check that (short) pointer is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_ptr( - struct xfs_btree_cur *cur, /* btree cursor */ - union xfs_btree_ptr *ptr, /* btree block disk address */ - int index, /* offset from ptr to check */ - int level); /* btree block level */ - /* * Delete the btree cursor. */ @@ -408,15 +375,6 @@ xfs_btree_dup_cursor( xfs_btree_cur_t *cur, /* input cursor */ xfs_btree_cur_t **ncur);/* output cursor */ -/* - * Change the cursor to point to the first record in the current block - * at the given level. Other levels are unaffected. - */ -int /* success=1, failure=0 */ -xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level); /* level to change */ - /* * Get a buffer for the block, return it with no data read. * Long-form addressing. @@ -448,15 +406,6 @@ xfs_btree_islastblock( xfs_btree_cur_t *cur, /* btree cursor */ int level); /* level to check */ -/* - * Change the cursor to point to the last record in the current block - * at the given level. Other levels are unaffected. - */ -int /* success=1, failure=0 */ -xfs_btree_lastrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level); /* level to change */ - /* * Compute first and last byte offsets for the fields given. * Interprets the offsets table, which contains struct field offsets. @@ -517,16 +466,6 @@ xfs_btree_reada_bufs( xfs_agblock_t agbno, /* allocation group block number */ xfs_extlen_t count); /* count of filesystem blocks */ -/* - * Read-ahead btree blocks, at the given level. - * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. - */ -int /* readahead block count */ -xfs_btree_readahead( - xfs_btree_cur_t *cur, /* btree cursor */ - int lev, /* level in btree */ - int lr); /* left/right bits */ - /* * Set the buffer for level "lev" in the cursor to bp, releasing * any previous buffer. @@ -544,13 +483,7 @@ xfs_btree_setbuf( int xfs_btree_increment(struct xfs_btree_cur *, int, int *); int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); -int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); -int xfs_btree_lshift(struct xfs_btree_cur *, int, int *); -int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); -int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, - union xfs_btree_key *, struct xfs_btree_cur **, int *); -int xfs_btree_new_root(struct xfs_btree_cur *, int *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); -- cgit v1.2.3 From 7f7c39ccb6045cf1fd5e7684a484c445291b44d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:50 +1100 Subject: [XFS] make btree tracing generic Make the existing bmap btree tracing generic so that it applies to all btree types. Some fragments lifted from a patch by Dave Chinner. This adds two files that were missed from the previous btree tracing checkin. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32210a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner --- fs/xfs/xfs_btree_trace.c | 249 +++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_btree_trace.h | 116 ++++++++++++++++++++++ 2 files changed, 365 insertions(+) create mode 100644 fs/xfs/xfs_btree_trace.c create mode 100644 fs/xfs/xfs_btree_trace.h (limited to 'fs') diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c new file mode 100644 index 00000000000..44ff942a0fd --- /dev/null +++ b/fs/xfs/xfs_btree_trace.c @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_btree_trace.h" + +STATIC void +xfs_btree_trace_ptr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr ptr, + __psunsigned_t *high, + __psunsigned_t *low) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + __u64 val = be64_to_cpu(ptr.l); + *high = val >> 32; + *low = (int)val; + } else { + *high = 0; + *low = be32_to_cpu(ptr.s); + } +} + +/* + * Add a trace buffer entry for arguments, for a buffer & 1 integer arg. + */ +void +xfs_btree_trace_argbi( + const char *func, + struct xfs_btree_cur *cur, + struct xfs_buf *b, + int i, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI, + line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for a buffer & 2 integer args. + */ +void +xfs_btree_trace_argbii( + const char *func, + struct xfs_btree_cur *cur, + struct xfs_buf *b, + int i0, + int i1, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII, + line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for 3 block-length args + * and an integer arg. + */ +void +xfs_btree_trace_argfffi( + const char *func, + struct xfs_btree_cur *cur, + xfs_dfiloff_t o, + xfs_dfsbno_t b, + xfs_dfilblks_t i, + int j, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI, + line, + o >> 32, (int)o, + b >> 32, (int)b, + i >> 32, (int)i, + (int)j, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for one integer arg. + */ +void +xfs_btree_trace_argi( + const char *func, + struct xfs_btree_cur *cur, + int i, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI, + line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, fsblock, key. + */ +void +xfs_btree_trace_argipk( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_ptr ptr, + union xfs_btree_key *key, + int line) +{ + __psunsigned_t high, low; + __uint64_t l0, l1; + + xfs_btree_trace_ptr(cur, ptr, &high, &low); + cur->bc_ops->trace_key(cur, key, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK, + line, i, high, low, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, fsblock, rec. + */ +void +xfs_btree_trace_argipr( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_ptr ptr, + union xfs_btree_rec *rec, + int line) +{ + __psunsigned_t high, low; + __uint64_t l0, l1, l2; + + xfs_btree_trace_ptr(cur, ptr, &high, &low); + cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR, + line, i, + high, low, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + l2 >> 32, (int)l2, + 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, key. + */ +void +xfs_btree_trace_argik( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_key *key, + int line) +{ + __uint64_t l0, l1; + + cur->bc_ops->trace_key(cur, key, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK, + line, i, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + 0, 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for record. + */ +void +xfs_btree_trace_argr( + const char *func, + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + int line) +{ + __uint64_t l0, l1, l2; + + cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR, + line, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + l2 >> 32, (int)l2, + 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for the cursor/operation. + */ +void +xfs_btree_trace_cursor( + const char *func, + struct xfs_btree_cur *cur, + int type, + int line) +{ + __uint32_t s0; + __uint64_t l0, l1; + char *s; + + switch (type) { + case XBT_ARGS: + s = "args"; + break; + case XBT_ENTRY: + s = "entry"; + break; + case XBT_ERROR: + s = "error"; + break; + case XBT_EXIT: + s = "exit"; + break; + default: + s = "unknown"; + break; + } + + cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line, + s0, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + (__psunsigned_t)cur->bc_bufs[0], + (__psunsigned_t)cur->bc_bufs[1], + (__psunsigned_t)cur->bc_bufs[2], + (__psunsigned_t)cur->bc_bufs[3], + (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1], + (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]); +} diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h new file mode 100644 index 00000000000..b3f5eb3c3c6 --- /dev/null +++ b/fs/xfs/xfs_btree_trace.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_BTREE_TRACE_H__ +#define __XFS_BTREE_TRACE_H__ + +struct xfs_btree_cur; +struct xfs_buf; + + +/* + * Trace hooks. + * i,j = integer (32 bit) + * b = btree block buffer (xfs_buf_t) + * p = btree ptr + * r = btree record + * k = btree key + */ + +#ifdef XFS_BTREE_TRACE + +/* + * Trace buffer entry types. + */ +#define XFS_BTREE_KTRACE_ARGBI 1 +#define XFS_BTREE_KTRACE_ARGBII 2 +#define XFS_BTREE_KTRACE_ARGFFFI 3 +#define XFS_BTREE_KTRACE_ARGI 4 +#define XFS_BTREE_KTRACE_ARGIPK 5 +#define XFS_BTREE_KTRACE_ARGIPR 6 +#define XFS_BTREE_KTRACE_ARGIK 7 +#define XFS_BTREE_KTRACE_ARGR 8 +#define XFS_BTREE_KTRACE_CUR 9 + +/* + * Sub-types for cursor traces. + */ +#define XBT_ARGS 0 +#define XBT_ENTRY 1 +#define XBT_ERROR 2 +#define XBT_EXIT 3 + +void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *, + struct xfs_buf *, int, int); +void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, + struct xfs_buf *, int, int, int); +void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *, + xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int); +void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); +void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, + union xfs_btree_ptr, union xfs_btree_key *, int); +void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int, + union xfs_btree_ptr, union xfs_btree_rec *, int); +void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int, + union xfs_btree_key *, int); +void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *, + union xfs_btree_rec *, int); +void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); + + +#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */ +extern ktrace_t *xfs_allocbt_trace_buf; + +#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */ +extern ktrace_t *xfs_inobt_trace_buf; + +#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */ +#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */ +extern ktrace_t *xfs_bmbt_trace_buf; + + +#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ + xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ + xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) +#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \ + xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__) +#define XFS_BTREE_TRACE_ARGI(c, i) \ + xfs_btree_trace_argi(__func__, c, i, __LINE__) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ + xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \ + xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) \ + xfs_btree_trace_argik(__func__, c, i, k, __LINE__) +#define XFS_BTREE_TRACE_ARGR(c, r) \ + xfs_btree_trace_argr(__func__, c, r, __LINE__) +#define XFS_BTREE_TRACE_CURSOR(c, t) \ + xfs_btree_trace_cursor(__func__, c, t, __LINE__) +#else +#define XFS_BTREE_TRACE_ARGBI(c, b, i) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) +#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) +#define XFS_BTREE_TRACE_ARGI(c, i) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) +#define XFS_BTREE_TRACE_ARGR(c, r) +#define XFS_BTREE_TRACE_CURSOR(c, t) +#endif /* XFS_BTREE_TRACE */ + +#endif /* __XFS_BTREE_TRACE_H__ */ -- cgit v1.2.3 From d112f2984592acb774187b3adddc107fb0825500 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:59:06 +1100 Subject: [XFS] Wait for all I/O on truncate to zero file size It's possible to have outstanding xfs_ioend_t's queued when the file size is zero. This can happen in the direct I/O path when a direct I/O write fails due to ENOSPC. In this case the xfs_ioend_t will still be queued (ie xfs_end_io_direct() does not know that the I/O failed so can't force the xfs_ioend_t to be flushed synchronously). When we truncate a file on unlink we don't know to wait for these xfs_ioend_ts and we can have a use-after-free situation if the inode is reclaimed before the xfs_ioend_t is finally processed. As was suggested by Dave Chinner lets wait for all I/Os to complete when truncating the file size to zero. SGI-PV: 981668 SGI-Modid: xfs-linux-melb:xfs-kern:32216a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cc0474ddd2d..2b1294b8ad7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1450,7 +1450,7 @@ xfs_itruncate_start( mp = ip->i_mount; /* wait for the completion of any pending DIOs */ - if (new_size < ip->i_size) + if (new_size == 0 || new_size < ip->i_size) vn_iowait(ip); /* -- cgit v1.2.3 From bc3048e3cd3806ccfd5b16b1a30e5d6013abbd3d Mon Sep 17 00:00:00 2001 From: Peter Leckie Date: Thu, 30 Oct 2008 17:05:04 +1100 Subject: [XFS] Clean up dquot pincount code. This is a code cleanup and optimization that removes a per mount point spinlock from the quota code and cleans up the code. The patch changes the pincount from being an int protected by a spinlock to an atomic_t allowing the pincount to be manipulated without holding the spinlock. This cleanup also protects against random wakup's of both the aild and xfssyncd by reevaluating the pincount after been woken. Two latter patches will address the Spurious wakeups. SGI-PV: 986789 SGI-Modid: xfs-linux-melb:xfs-kern:32215a Signed-off-by: Peter Leckie Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Donald Douwsma Signed-off-by: Lachlan McIlroy --- fs/xfs/quota/xfs_dquot.c | 6 +++--- fs/xfs/quota/xfs_dquot.h | 4 ++-- fs/xfs/quota/xfs_dquot_item.c | 37 +++++++++++-------------------------- fs/xfs/quota/xfs_qm.c | 2 -- fs/xfs/quota/xfs_qm.h | 1 - 5 files changed, 16 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index f2705f2fd43..d3f4fbbe248 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -101,7 +101,7 @@ xfs_qm_dqinit( if (brandnewdquot) { dqp->dq_flnext = dqp->dq_flprev = dqp; mutex_init(&dqp->q_qlock); - sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); + init_waitqueue_head(&dqp->q_pinwait); /* * Because we want to use a counting completion, complete @@ -131,7 +131,7 @@ xfs_qm_dqinit( dqp->q_res_bcount = 0; dqp->q_res_icount = 0; dqp->q_res_rtbcount = 0; - dqp->q_pincount = 0; + atomic_set(&dqp->q_pincount, 0); dqp->q_hash = NULL; ASSERT(dqp->dq_flnext == dqp->dq_flprev); @@ -1489,7 +1489,7 @@ xfs_qm_dqpurge( "xfs_qm_dqpurge: dquot %p flush failed", dqp); xfs_dqflock(dqp); } - ASSERT(dqp->q_pincount == 0); + ASSERT(atomic_read(&dqp->q_pincount) == 0); ASSERT(XFS_FORCED_SHUTDOWN(mp) || !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 8958d0faf8d..7e455337e2b 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -83,8 +83,8 @@ typedef struct xfs_dquot { xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ mutex_t q_qlock; /* quota lock */ struct completion q_flush; /* flush completion queue */ - uint q_pincount; /* pin count for this dquot */ - sv_t q_pinwait; /* sync var for pinning */ + atomic_t q_pincount; /* dquot pin count */ + wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ #ifdef XFS_DQUOT_TRACE struct ktrace *q_trace; /* trace header structure */ #endif diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index f028644caa5..e33f8646418 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -88,25 +88,22 @@ xfs_qm_dquot_logitem_format( /* * Increment the pin count of the given dquot. - * This value is protected by pinlock spinlock in the xQM structure. */ STATIC void xfs_qm_dquot_logitem_pin( xfs_dq_logitem_t *logitem) { - xfs_dquot_t *dqp; + xfs_dquot_t *dqp = logitem->qli_dquot; - dqp = logitem->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); - dqp->q_pincount++; - spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); + atomic_inc(dqp->q_pincount); } /* * Decrement the pin count of the given dquot, and wake up * anyone in xfs_dqwait_unpin() if the count goes to 0. The - * dquot must have been previously pinned with a call to xfs_dqpin(). + * dquot must have been previously pinned with a call to + * xfs_qm_dquot_logitem_pin(). */ /* ARGSUSED */ STATIC void @@ -114,16 +111,11 @@ xfs_qm_dquot_logitem_unpin( xfs_dq_logitem_t *logitem, int stale) { - xfs_dquot_t *dqp; + xfs_dquot_t *dqp = logitem->qli_dquot; - dqp = logitem->qli_dquot; - ASSERT(dqp->q_pincount > 0); - spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); - dqp->q_pincount--; - if (dqp->q_pincount == 0) { - sv_broadcast(&dqp->q_pinwait); - } - spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); + ASSERT(atomic_read(&dqp->q_pincount) > 0); + if (atomic_dec_and_test(&dqp->q_pincount)) + wake_up(&dqp->q_pinwait); } /* ARGSUSED */ @@ -193,21 +185,14 @@ xfs_qm_dqunpin_wait( xfs_dquot_t *dqp) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); - if (dqp->q_pincount == 0) { + if (atomic_read(&dqp->q_pincount) == 0) return; - } /* * Give the log a push so we don't wait here too long. */ xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); - spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); - if (dqp->q_pincount == 0) { - spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); - return; - } - sv_wait(&(dqp->q_pinwait), PINOD, - &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s); + wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } /* @@ -310,7 +295,7 @@ xfs_qm_dquot_logitem_trylock( uint retval; dqp = qip->qli_dquot; - if (dqp->q_pincount > 0) + if (atomic_read(&dqp->q_pincount) > 0) return (XFS_ITEM_PINNED); if (! xfs_qm_dqlock_nowait(dqp)) diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index df0ffef9775..270f775974e 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1137,7 +1137,6 @@ xfs_qm_init_quotainfo( return error; } - spin_lock_init(&qinf->qi_pinlock); xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); qinf->qi_dqreclaims = 0; @@ -1234,7 +1233,6 @@ xfs_qm_destroy_quotainfo( */ xfs_qm_rele_quotafs_ref(mp); - spinlock_destroy(&qi->qi_pinlock); xfs_qm_list_destroy(&qi->qi_dqlist); if (qi->qi_uquotaip) { diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 44f25349e47..4f2de977172 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -106,7 +106,6 @@ typedef struct xfs_qm { typedef struct xfs_quotainfo { xfs_inode_t *qi_uquotaip; /* user quota inode */ xfs_inode_t *qi_gquotaip; /* group quota inode */ - spinlock_t qi_pinlock; /* dquot pinning lock */ xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ int qi_dqreclaims; /* a change here indicates a removal in the dqlist */ -- cgit v1.2.3 From d1de802155341ab63e64d37c58c61d6f358bb3ad Mon Sep 17 00:00:00 2001 From: Peter Leckie Date: Thu, 30 Oct 2008 17:05:18 +1100 Subject: [XFS] Fix build brakage from patch "Clean up dquot pincount code" This is a fix for patch " Clean up dquot pincount code" which introduced a build breakage due to a missing & in xfs_qm_dquot_logitem_pin. SGI-PV: 986789 SGI-Modid: xfs-linux-melb:xfs-kern:32221a Signed-off-by: Peter Leckie Signed-off-by: Donald Douwsma Signed-off-by: Lachlan McIlroy --- fs/xfs/quota/xfs_dquot_item.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index e33f8646418..48f08109621 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -96,7 +96,7 @@ xfs_qm_dquot_logitem_pin( xfs_dquot_t *dqp = logitem->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - atomic_inc(dqp->q_pincount); + atomic_inc(&dqp->q_pincount); } /* -- cgit v1.2.3 From 24ee0e49c9cce23acb1758728cb09e8d2b53bd33 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 17:05:26 +1100 Subject: [XFS] Make xfs_btree_check_ptr() debug-only code. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32224a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_btree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index b735c7299a8..72a26bb7643 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -160,6 +160,7 @@ xfs_btree_check_lptr( return 0; } +#ifdef DEBUG /* * Check that (short) pointer is ok. */ @@ -197,6 +198,7 @@ xfs_btree_check_ptr( be32_to_cpu((&ptr->s)[index]), level); } } +#endif /* * Delete the btree cursor. -- cgit v1.2.3 From 847fff5ca881670ca8ec617afeb943950f0c804b Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Thu, 30 Oct 2008 17:05:38 +1100 Subject: [XFS] Sync up kernel and user-space headers SGI-PV: 986558 SGI-Modid: xfs-linux-melb:xfs-kern:32231a Signed-off-by: Barry Naujok Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_ag.h | 5 +- fs/xfs/xfs_alloc.h | 27 ++--- fs/xfs/xfs_arch.h | 39 ++++-- fs/xfs/xfs_bit.h | 3 +- fs/xfs/xfs_bmap.h | 61 +++++----- fs/xfs/xfs_bmap_btree.h | 3 - fs/xfs/xfs_btree.h | 4 - fs/xfs/xfs_da_btree.h | 4 +- fs/xfs/xfs_ialloc.h | 3 - fs/xfs/xfs_imap.h | 2 - fs/xfs/xfs_inode.h | 246 ++++++++++++++++++------------------- fs/xfs/xfs_inode_item.h | 41 +++---- fs/xfs/xfs_mount.h | 17 +-- fs/xfs/xfs_trans.h | 317 ++++++++++++++++++++++++------------------------ 14 files changed, 383 insertions(+), 389 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 61b292a9fb4..729ee3eb39a 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -192,15 +192,16 @@ typedef struct xfs_perag xfs_agino_t pagi_freecount; /* number of free inodes */ xfs_agino_t pagi_count; /* number of allocated inodes */ int pagb_count; /* pagb slots in use */ + xfs_perag_busy_t *pagb_list; /* unstable blocks */ #ifdef __KERNEL__ spinlock_t pagb_lock; /* lock for pagb_list */ -#endif - xfs_perag_busy_t *pagb_list; /* unstable blocks */ + atomic_t pagf_fstrms; /* # of filestreams active in this AG */ int pag_ici_init; /* incore inode cache initialised */ rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ +#endif } xfs_perag_t; #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 5aec15d0651..588172796f7 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -121,6 +121,19 @@ extern ktrace_t *xfs_alloc_trace_buf; #define XFS_ALLOC_KTRACE_BUSYSEARCH 6 #endif +void +xfs_alloc_mark_busy(xfs_trans_t *tp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len); + +void +xfs_alloc_clear_busy(xfs_trans_t *tp, + xfs_agnumber_t ag, + int idx); + +#endif /* __KERNEL__ */ + /* * Compute and fill in value of m_ag_maxlevels. */ @@ -196,18 +209,4 @@ xfs_free_extent( xfs_fsblock_t bno, /* starting block number of extent */ xfs_extlen_t len); /* length of extent */ -void -xfs_alloc_mark_busy(xfs_trans_t *tp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len); - -void -xfs_alloc_clear_busy(xfs_trans_t *tp, - xfs_agnumber_t ag, - int idx); - - -#endif /* __KERNEL__ */ - #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index 0b3b5efe848..53d5e70d136 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -41,21 +41,36 @@ #endif #ifdef XFS_NATIVE_HOST -#define cpu_to_be16(val) ((__be16)(val)) -#define cpu_to_be32(val) ((__be32)(val)) -#define cpu_to_be64(val) ((__be64)(val)) -#define be16_to_cpu(val) ((__uint16_t)(val)) -#define be32_to_cpu(val) ((__uint32_t)(val)) -#define be64_to_cpu(val) ((__uint64_t)(val)) +#define cpu_to_be16(val) ((__force __be16)(__u16)(val)) +#define cpu_to_be32(val) ((__force __be32)(__u32)(val)) +#define cpu_to_be64(val) ((__force __be64)(__u64)(val)) +#define be16_to_cpu(val) ((__force __u16)(__be16)(val)) +#define be32_to_cpu(val) ((__force __u32)(__be32)(val)) +#define be64_to_cpu(val) ((__force __u64)(__be64)(val)) #else -#define cpu_to_be16(val) (__swab16((__uint16_t)(val))) -#define cpu_to_be32(val) (__swab32((__uint32_t)(val))) -#define cpu_to_be64(val) (__swab64((__uint64_t)(val))) -#define be16_to_cpu(val) (__swab16((__be16)(val))) -#define be32_to_cpu(val) (__swab32((__be32)(val))) -#define be64_to_cpu(val) (__swab64((__be64)(val))) +#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val))) +#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val))) +#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val))) +#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val))) +#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val))) +#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val))) #endif +static inline void be16_add_cpu(__be16 *a, __s16 b) +{ + *a = cpu_to_be16(be16_to_cpu(*a) + b); +} + +static inline void be32_add_cpu(__be32 *a, __s32 b) +{ + *a = cpu_to_be32(be32_to_cpu(*a) + b); +} + +static inline void be64_add_cpu(__be64 *a, __s64 b) +{ + *a = cpu_to_be64(be64_to_cpu(*a) + b); +} + #endif /* __KERNEL__ */ /* do we need conversion? */ diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 8e0e463dae2..bca7b243c31 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -61,8 +61,7 @@ static inline int xfs_highbit64(__uint64_t v) /* Get low bit set out of 32-bit argument, -1 if none set */ static inline int xfs_lowbit32(__uint32_t v) { - unsigned long t = v; - return (v) ? find_first_bit(&t, 32) : -1; + return ffs(v) - 1; } /* Get low bit set out of 64-bit argument, -1 if none set */ diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 9f3e3a836d1..7c9d12cd7a4 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -137,9 +137,7 @@ typedef struct xfs_bmalloca { char conv; /* overwriting unwritten extents */ } xfs_bmalloca_t; -#ifdef __KERNEL__ - -#if defined(XFS_BMAP_TRACE) +#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE) /* * Trace operations for bmap extent tracing */ @@ -163,9 +161,12 @@ xfs_bmap_trace_exlist( int whichfork); /* data or attr fork */ #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ xfs_bmap_trace_exlist(__func__,ip,c,w) -#else + +#else /* __KERNEL__ && XFS_BMAP_TRACE */ + #define XFS_BMAP_TRACE_EXLIST(ip,c,w) -#endif + +#endif /* __KERNEL__ && XFS_BMAP_TRACE */ /* * Convert inode from non-attributed to attributed. @@ -205,20 +206,6 @@ xfs_bmap_compute_maxlevels( struct xfs_mount *mp, /* file system mount structure */ int whichfork); /* data or attr fork */ -/* - * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi - * caller. Frees all the extents that need freeing, which must be done - * last due to locking considerations. - * - * Return 1 if the given transaction was committed and a new one allocated, - * and 0 otherwise. - */ -int /* error */ -xfs_bmap_finish( - struct xfs_trans **tp, /* transaction pointer addr */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - int *committed); /* xact committed or not */ - /* * Returns the file-relative block number of the first unused block in the file. * This is the lowest-address hole if the file has holes, else the first block @@ -343,6 +330,32 @@ xfs_bunmapi( extents */ int *done); /* set if not done yet */ +/* + * Check an extent list, which has just been read, for + * any bit in the extent flag field. + */ +int +xfs_check_nostate_extents( + struct xfs_ifork *ifp, + xfs_extnum_t idx, + xfs_extnum_t num); + +#ifdef __KERNEL__ + +/* + * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi + * caller. Frees all the extents that need freeing, which must be done + * last due to locking considerations. + * + * Return 1 if the given transaction was committed and a new one allocated, + * and 0 otherwise. + */ +int /* error */ +xfs_bmap_finish( + struct xfs_trans **tp, /* transaction pointer addr */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + int *committed); /* xact committed or not */ + /* * Fcntl interface to xfs_bmapi. */ @@ -374,16 +387,6 @@ xfs_bmap_count_blocks( int whichfork, int *count); -/* - * Check an extent list, which has just been read, for - * any bit in the extent flag field. - */ -int -xfs_check_nostate_extents( - struct xfs_ifork *ifp, - xfs_extnum_t idx, - xfs_extnum_t num); - /* * Search the extent records for the entry containing block bno. * If bno lies in a hole, point to the next entry. If bno lies diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 6f38e250570..5669242b52d 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -231,8 +231,6 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0]) -#ifdef __KERNEL__ - /* * Prototypes for xfs_bmap.c to call. */ @@ -264,6 +262,5 @@ extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); -#endif /* __KERNEL__ */ #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 2a22c587f1a..7425b2b4a25 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -327,8 +327,6 @@ typedef struct xfs_btree_cur #define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp)) -#ifdef __KERNEL__ - /* * Check that long form block header is ok. */ @@ -515,8 +513,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) return be16_to_cpu(block->bb_level); } -#endif /* __KERNEL__ */ - /* * Min and max functions for extlen, agblock, fileoff, and filblks types. diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 599e270e695..70b710c1792 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -206,9 +206,8 @@ struct xfs_nameops { }; -#ifdef __KERNEL__ /*======================================================================== - * Function prototypes for the kernel. + * Function prototypes. *========================================================================*/ /* @@ -269,6 +268,5 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf); extern struct kmem_zone *xfs_da_state_zone; extern struct kmem_zone *xfs_dabuf_zone; -#endif /* __KERNEL__ */ #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index c5745f6d94e..ccf554a6e0a 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -56,7 +56,6 @@ static inline int xfs_ialloc_find_free(xfs_inofree_t *fp) } -#ifdef __KERNEL__ /* * Allocate an inode on disk. * Mode is used to tell whether the new inode will need space, and whether @@ -174,6 +173,4 @@ int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, __int32_t *fcnt, xfs_inofree_t *free, int *stat); -#endif /* __KERNEL__ */ - #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h index d3645000398..f9ce62890ea 100644 --- a/fs/xfs/xfs_imap.h +++ b/fs/xfs/xfs_imap.h @@ -30,11 +30,9 @@ typedef struct xfs_imap { ushort im_boffset; /* inode offset in block in bytes */ } xfs_imap_t; -#ifdef __KERNEL__ struct xfs_mount; struct xfs_trans; int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_imap_t *, uint); -#endif #endif /* __XFS_IMAP_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 2a69a7dee22..a8f1e6833aa 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -20,7 +20,7 @@ struct xfs_dinode; struct xfs_dinode_core; - +struct xfs_inode; /* * Fork identifiers. @@ -83,54 +83,6 @@ typedef struct xfs_ifork { } if_u2; } xfs_ifork_t; -/* - * Flags for xfs_ichgtime(). - */ -#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ -#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ - -/* - * Per-fork incore inode flags. - */ -#define XFS_IFINLINE 0x01 /* Inline data is read in */ -#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ -#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ -#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ - -/* - * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate(). - */ -#define XFS_IMAP_LOOKUP 0x1 -#define XFS_IMAP_BULKSTAT 0x2 - -#ifdef __KERNEL__ -struct bhv_desc; -struct cred; -struct ktrace; -struct xfs_buf; -struct xfs_bmap_free; -struct xfs_bmbt_irec; -struct xfs_bmbt_block; -struct xfs_inode; -struct xfs_inode_log_item; -struct xfs_mount; -struct xfs_trans; -struct xfs_dquot; - -#if defined(XFS_ILOCK_TRACE) -#define XFS_ILOCK_KTRACE_SIZE 32 -extern ktrace_t *xfs_ilock_trace_buf; -extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); -#else -#define xfs_ilock_trace(i,n,f,ra) -#endif - -typedef struct dm_attrs_s { - __uint32_t da_dmevmask; /* DMIG event mask */ - __uint16_t da_dmstate; /* DMIG state info */ - __uint16_t da_pad; /* DMIG extra padding */ -} dm_attrs_t; - /* * This is the xfs in-core inode structure. * Most of the on-disk inode is embedded in the i_d field. @@ -191,6 +143,96 @@ typedef struct xfs_icdinode { __uint32_t di_gen; /* generation number */ } xfs_icdinode_t; +/* + * Flags for xfs_ichgtime(). + */ +#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ +#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ + +/* + * Per-fork incore inode flags. + */ +#define XFS_IFINLINE 0x01 /* Inline data is read in */ +#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ +#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ +#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ + +/* + * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate(). + */ +#define XFS_IMAP_LOOKUP 0x1 +#define XFS_IMAP_BULKSTAT 0x2 + +/* + * Fork handling. + */ + +#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) +#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) + +#define XFS_IFORK_PTR(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + &(ip)->i_df : \ + (ip)->i_afp) +#define XFS_IFORK_DSIZE(ip) \ + (XFS_IFORK_Q(ip) ? \ + XFS_IFORK_BOFF(ip) : \ + XFS_LITINO((ip)->i_mount)) +#define XFS_IFORK_ASIZE(ip) \ + (XFS_IFORK_Q(ip) ? \ + XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ + 0) +#define XFS_IFORK_SIZE(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + XFS_IFORK_DSIZE(ip) : \ + XFS_IFORK_ASIZE(ip)) +#define XFS_IFORK_FORMAT(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + (ip)->i_d.di_format : \ + (ip)->i_d.di_aformat) +#define XFS_IFORK_FMT_SET(ip,w,n) \ + ((w) == XFS_DATA_FORK ? \ + ((ip)->i_d.di_format = (n)) : \ + ((ip)->i_d.di_aformat = (n))) +#define XFS_IFORK_NEXTENTS(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + (ip)->i_d.di_nextents : \ + (ip)->i_d.di_anextents) +#define XFS_IFORK_NEXT_SET(ip,w,n) \ + ((w) == XFS_DATA_FORK ? \ + ((ip)->i_d.di_nextents = (n)) : \ + ((ip)->i_d.di_anextents = (n))) + + + +#ifdef __KERNEL__ + +struct bhv_desc; +struct cred; +struct ktrace; +struct xfs_buf; +struct xfs_bmap_free; +struct xfs_bmbt_irec; +struct xfs_bmbt_block; +struct xfs_inode_log_item; +struct xfs_mount; +struct xfs_trans; +struct xfs_dquot; + +#if defined(XFS_ILOCK_TRACE) +#define XFS_ILOCK_KTRACE_SIZE 32 +extern ktrace_t *xfs_ilock_trace_buf; +extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); +#else +#define xfs_ilock_trace(i,n,f,ra) +#endif + +typedef struct dm_attrs_s { + __uint32_t da_dmevmask; /* DMIG event mask */ + __uint16_t da_dmstate; /* DMIG state info */ + __uint16_t da_pad; /* DMIG extra padding */ +} dm_attrs_t; + typedef struct { struct xfs_inode *ip_mnext; /* next inode in mount list */ struct xfs_inode *ip_mprev; /* ptr to prev inode */ @@ -327,50 +369,26 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) spin_unlock(&ip->i_flags_lock); return ret; } -#endif /* __KERNEL__ */ - /* - * Fork handling. + * Manage the i_flush queue embedded in the inode. This completion + * queue synchronizes processes attempting to flush the in-core + * inode back to disk. */ +static inline void xfs_iflock(xfs_inode_t *ip) +{ + wait_for_completion(&ip->i_flush); +} -#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) -#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) - -#define XFS_IFORK_PTR(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - &(ip)->i_df : \ - (ip)->i_afp) -#define XFS_IFORK_DSIZE(ip) \ - (XFS_IFORK_Q(ip) ? \ - XFS_IFORK_BOFF(ip) : \ - XFS_LITINO((ip)->i_mount)) -#define XFS_IFORK_ASIZE(ip) \ - (XFS_IFORK_Q(ip) ? \ - XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ - 0) -#define XFS_IFORK_SIZE(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - XFS_IFORK_DSIZE(ip) : \ - XFS_IFORK_ASIZE(ip)) -#define XFS_IFORK_FORMAT(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - (ip)->i_d.di_format : \ - (ip)->i_d.di_aformat) -#define XFS_IFORK_FMT_SET(ip,w,n) \ - ((w) == XFS_DATA_FORK ? \ - ((ip)->i_d.di_format = (n)) : \ - ((ip)->i_d.di_aformat = (n))) -#define XFS_IFORK_NEXTENTS(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - (ip)->i_d.di_nextents : \ - (ip)->i_d.di_anextents) -#define XFS_IFORK_NEXT_SET(ip,w,n) \ - ((w) == XFS_DATA_FORK ? \ - ((ip)->i_d.di_nextents = (n)) : \ - ((ip)->i_d.di_anextents = (n))) +static inline int xfs_iflock_nowait(xfs_inode_t *ip) +{ + return try_wait_for_completion(&ip->i_flush); +} -#ifdef __KERNEL__ +static inline void xfs_ifunlock(xfs_inode_t *ip) +{ + complete(&ip->i_flush); +} /* * In-core inode flags. @@ -490,19 +508,11 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int); /* * xfs_inode.c prototypes. */ -int xfs_itobp(struct xfs_mount *, struct xfs_trans *, - xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, - xfs_daddr_t, uint, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_inode_t **, xfs_daddr_t, uint); -int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); -void xfs_dinode_from_disk(struct xfs_icdinode *, - struct xfs_dinode_core *); -void xfs_dinode_to_disk(struct xfs_dinode_core *, - struct xfs_icdinode *); uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); @@ -514,15 +524,11 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); struct xfs_inode * xfs_inode_alloc(struct xfs_mount *, xfs_ino_t); -void xfs_idestroy_fork(xfs_inode_t *, int); void xfs_idestroy(xfs_inode_t *); -void xfs_idata_realloc(xfs_inode_t *, int, int); void xfs_iextract(xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); -void xfs_iroot_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); -int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int); int xfs_iflush(xfs_inode_t *, uint); void xfs_iflush_all(struct xfs_mount *); void xfs_ichgtime(xfs_inode_t *, int); @@ -533,6 +539,21 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); void xfs_synchronize_atime(xfs_inode_t *); void xfs_mark_inode_dirty_sync(xfs_inode_t *); +#endif /* __KERNEL__ */ + +int xfs_itobp(struct xfs_mount *, struct xfs_trans *, + struct xfs_inode *, struct xfs_dinode **, + struct xfs_buf **, xfs_daddr_t, uint, uint); +void xfs_dinode_from_disk(struct xfs_icdinode *, + struct xfs_dinode_core *); +void xfs_dinode_to_disk(struct xfs_dinode_core *, + struct xfs_icdinode *); +void xfs_idestroy_fork(struct xfs_inode *, int); +void xfs_idata_realloc(struct xfs_inode *, int, int); +void xfs_iroot_realloc(struct xfs_inode *, int, int); +int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); +int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); + xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, xfs_bmbt_irec_t *); @@ -562,7 +583,8 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) #ifdef DEBUG -void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t); +void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, + xfs_fsize_t); #else /* DEBUG */ #define xfs_isize_check(mp, ip, isize) #endif /* DEBUG */ @@ -577,26 +599,4 @@ extern struct kmem_zone *xfs_ifork_zone; extern struct kmem_zone *xfs_inode_zone; extern struct kmem_zone *xfs_ili_zone; -/* - * Manage the i_flush queue embedded in the inode. This completion - * queue synchronizes processes attempting to flush the in-core - * inode back to disk. - */ -static inline void xfs_iflock(xfs_inode_t *ip) -{ - wait_for_completion(&ip->i_flush); -} - -static inline int xfs_iflock_nowait(xfs_inode_t *ip) -{ - return try_wait_for_completion(&ip->i_flush); -} - -static inline void xfs_ifunlock(xfs_inode_t *ip) -{ - complete(&ip->i_flush); -} - -#endif /* __KERNEL__ */ - #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 40513077ab3..1ff04cc323a 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -112,6 +112,24 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED) +#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w) +static inline int xfs_ilog_fbroot(int w) +{ + return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); +} + +#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w) +static inline int xfs_ilog_fext(int w) +{ + return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); +} + +#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w) +static inline int xfs_ilog_fdata(int w) +{ + return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA); +} + #ifdef __KERNEL__ struct xfs_buf; @@ -148,26 +166,6 @@ typedef struct xfs_inode_log_item { } xfs_inode_log_item_t; -#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w) -static inline int xfs_ilog_fdata(int w) -{ - return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA); -} - -#endif /* __KERNEL__ */ - -#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w) -static inline int xfs_ilog_fbroot(int w) -{ - return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); -} - -#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w) -static inline int xfs_ilog_fext(int w) -{ - return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); -} - static inline int xfs_inode_clean(xfs_inode_t *ip) { return (!ip->i_itemp || @@ -175,9 +173,6 @@ static inline int xfs_inode_clean(xfs_inode_t *ip) !ip->i_update_core; } - -#ifdef __KERNEL__ - extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); extern void xfs_inode_item_destroy(struct xfs_inode *); extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 49d647e730e..56c3b122e53 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -44,14 +44,14 @@ typedef struct xfs_trans_reservations { } xfs_trans_reservations_t; #ifndef __KERNEL__ -/* - * Moved here from xfs_ag.h to avoid reordering header files - */ + #define XFS_DADDR_TO_AGNO(mp,d) \ ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) #define XFS_DADDR_TO_AGBNO(mp,d) \ ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks)) -#else + +#else /* __KERNEL__ */ + struct cred; struct log; struct xfs_mount_args; @@ -507,7 +507,6 @@ typedef struct xfs_mod_sb { #define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) #define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) -extern void xfs_mod_sb(xfs_trans_t *, __int64_t); extern int xfs_log_sbcount(xfs_mount_t *, uint); extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); @@ -526,9 +525,6 @@ extern void xfs_freesb(xfs_mount_t *); extern int xfs_fs_writable(xfs_mount_t *); extern int xfs_syncsub(xfs_mount_t *, int, int *); extern int xfs_sync_inodes(xfs_mount_t *, int, int *); -extern xfs_agnumber_t xfs_initialize_perag(xfs_mount_t *, xfs_agnumber_t); -extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); -extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); @@ -540,4 +536,9 @@ extern struct xfs_dmops xfs_dmcore_xfs; #endif /* __KERNEL__ */ +extern void xfs_mod_sb(struct xfs_trans *, __int64_t); +extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t); +extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); +extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 74c80bd2b0e..1d89d50a5b9 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -18,6 +18,8 @@ #ifndef __XFS_TRANS_H__ #define __XFS_TRANS_H__ +struct xfs_log_item; + /* * This is the structure written in the log at the head of * every transaction. It identifies the type and id of the @@ -98,76 +100,6 @@ typedef struct xfs_trans_header { #define XFS_TRANS_TYPE_MAX 41 /* new transaction types need to be reflected in xfs_logprint(8) */ - -#ifdef __KERNEL__ -struct xfs_buf; -struct xfs_buftarg; -struct xfs_efd_log_item; -struct xfs_efi_log_item; -struct xfs_inode; -struct xfs_item_ops; -struct xfs_log_iovec; -struct xfs_log_item; -struct xfs_log_item_desc; -struct xfs_mount; -struct xfs_trans; -struct xfs_dquot_acct; - -typedef struct xfs_log_item { - struct list_head li_ail; /* AIL pointers */ - xfs_lsn_t li_lsn; /* last on-disk lsn */ - struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ - struct xfs_mount *li_mountp; /* ptr to fs mount */ - uint li_type; /* item type */ - uint li_flags; /* misc flags */ - struct xfs_log_item *li_bio_list; /* buffer item list */ - void (*li_cb)(struct xfs_buf *, - struct xfs_log_item *); - /* buffer item iodone */ - /* callback func */ - struct xfs_item_ops *li_ops; /* function list */ -} xfs_log_item_t; - -#define XFS_LI_IN_AIL 0x1 -#define XFS_LI_ABORTED 0x2 - -typedef struct xfs_item_ops { - uint (*iop_size)(xfs_log_item_t *); - void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); - void (*iop_pin)(xfs_log_item_t *); - void (*iop_unpin)(xfs_log_item_t *, int); - void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); - uint (*iop_trylock)(xfs_log_item_t *); - void (*iop_unlock)(xfs_log_item_t *); - xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); - void (*iop_push)(xfs_log_item_t *); - void (*iop_pushbuf)(xfs_log_item_t *); - void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); -} xfs_item_ops_t; - -#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) -#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) -#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) -#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags) -#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) -#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) -#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) -#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) -#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) -#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) -#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) - -/* - * Return values for the IOP_TRYLOCK() routines. - */ -#define XFS_ITEM_SUCCESS 0 -#define XFS_ITEM_PINNED 1 -#define XFS_ITEM_LOCKED 2 -#define XFS_ITEM_FLUSHING 3 -#define XFS_ITEM_PUSHBUF 4 - -#endif /* __KERNEL__ */ - /* * This structure is used to track log items associated with * a transaction. It points to the log item and keeps some @@ -176,7 +108,7 @@ typedef struct xfs_item_ops { * once we get to commit processing (see xfs_trans_commit()). */ typedef struct xfs_log_item_desc { - xfs_log_item_t *lid_item; + struct xfs_log_item *lid_item; ushort lid_size; unsigned char lid_flags; unsigned char lid_index; @@ -276,94 +208,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs)); } -#ifdef __KERNEL__ -/* - * This structure is used to maintain a list of block ranges that have been - * freed in the transaction. The ranges are listed in the perag[] busy list - * between when they're freed and the transaction is committed to disk. - */ - -typedef struct xfs_log_busy_slot { - xfs_agnumber_t lbc_ag; - ushort lbc_idx; /* index in perag.busy[] */ -} xfs_log_busy_slot_t; - -#define XFS_LBC_NUM_SLOTS 31 -typedef struct xfs_log_busy_chunk { - struct xfs_log_busy_chunk *lbc_next; - uint lbc_free; /* free slots bitmask */ - ushort lbc_unused; /* first unused */ - xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; -} xfs_log_busy_chunk_t; - -#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) -#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) - -#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) -#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) -#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) -#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) -#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) - -/* - * This is the type of function which can be given to xfs_trans_callback() - * to be called upon the transaction's commit to disk. - */ -typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *); - -/* - * This is the structure maintained for every active transaction. - */ -typedef struct xfs_trans { - unsigned int t_magic; /* magic number */ - xfs_log_callback_t t_logcb; /* log callback struct */ - unsigned int t_type; /* transaction type */ - unsigned int t_log_res; /* amt of log space resvd */ - unsigned int t_log_count; /* count for perm log res */ - unsigned int t_blk_res; /* # of blocks resvd */ - unsigned int t_blk_res_used; /* # of resvd blocks used */ - unsigned int t_rtx_res; /* # of rt extents resvd */ - unsigned int t_rtx_res_used; /* # of resvd rt extents used */ - xfs_log_ticket_t t_ticket; /* log mgr ticket */ - xfs_lsn_t t_lsn; /* log seq num of start of - * transaction. */ - xfs_lsn_t t_commit_lsn; /* log seq num of end of - * transaction. */ - struct xfs_mount *t_mountp; /* ptr to fs mount struct */ - struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ - xfs_trans_callback_t t_callback; /* transaction callback */ - void *t_callarg; /* callback arg */ - unsigned int t_flags; /* misc flags */ - int64_t t_icount_delta; /* superblock icount change */ - int64_t t_ifree_delta; /* superblock ifree change */ - int64_t t_fdblocks_delta; /* superblock fdblocks chg */ - int64_t t_res_fdblocks_delta; /* on-disk only chg */ - int64_t t_frextents_delta;/* superblock freextents chg*/ - int64_t t_res_frextents_delta; /* on-disk only chg */ -#ifdef DEBUG - int64_t t_ag_freeblks_delta; /* debugging counter */ - int64_t t_ag_flist_delta; /* debugging counter */ - int64_t t_ag_btree_delta; /* debugging counter */ -#endif - int64_t t_dblocks_delta;/* superblock dblocks change */ - int64_t t_agcount_delta;/* superblock agcount change */ - int64_t t_imaxpct_delta;/* superblock imaxpct change */ - int64_t t_rextsize_delta;/* superblock rextsize chg */ - int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */ - int64_t t_rblocks_delta;/* superblock rblocks change */ - int64_t t_rextents_delta;/* superblocks rextents chg */ - int64_t t_rextslog_delta;/* superblocks rextslog chg */ - unsigned int t_items_free; /* log item descs free */ - xfs_log_item_chunk_t t_items; /* first log item desc chunk */ - xfs_trans_header_t t_header; /* header for in-log trans */ - unsigned int t_busy_free; /* busy descs free */ - xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ - unsigned long t_pflags; /* saved process flags state */ -} xfs_trans_t; - -#endif /* __KERNEL__ */ - - #define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ /* * Values for t_flags. @@ -906,6 +750,156 @@ typedef struct xfs_trans { #define XFS_DQUOT_REF 1 #ifdef __KERNEL__ + +struct xfs_buf; +struct xfs_buftarg; +struct xfs_efd_log_item; +struct xfs_efi_log_item; +struct xfs_inode; +struct xfs_item_ops; +struct xfs_log_iovec; +struct xfs_log_item_desc; +struct xfs_mount; +struct xfs_trans; +struct xfs_dquot_acct; + +typedef struct xfs_log_item { + struct list_head li_ail; /* AIL pointers */ + xfs_lsn_t li_lsn; /* last on-disk lsn */ + struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ + struct xfs_mount *li_mountp; /* ptr to fs mount */ + uint li_type; /* item type */ + uint li_flags; /* misc flags */ + struct xfs_log_item *li_bio_list; /* buffer item list */ + void (*li_cb)(struct xfs_buf *, + struct xfs_log_item *); + /* buffer item iodone */ + /* callback func */ + struct xfs_item_ops *li_ops; /* function list */ +} xfs_log_item_t; + +#define XFS_LI_IN_AIL 0x1 +#define XFS_LI_ABORTED 0x2 + +typedef struct xfs_item_ops { + uint (*iop_size)(xfs_log_item_t *); + void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); + void (*iop_pin)(xfs_log_item_t *); + void (*iop_unpin)(xfs_log_item_t *, int); + void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); + uint (*iop_trylock)(xfs_log_item_t *); + void (*iop_unlock)(xfs_log_item_t *); + xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); + void (*iop_push)(xfs_log_item_t *); + void (*iop_pushbuf)(xfs_log_item_t *); + void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); +} xfs_item_ops_t; + +#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) +#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) +#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) +#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags) +#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) +#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) +#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) +#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) +#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) +#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) +#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) + +/* + * Return values for the IOP_TRYLOCK() routines. + */ +#define XFS_ITEM_SUCCESS 0 +#define XFS_ITEM_PINNED 1 +#define XFS_ITEM_LOCKED 2 +#define XFS_ITEM_FLUSHING 3 +#define XFS_ITEM_PUSHBUF 4 + +/* + * This structure is used to maintain a list of block ranges that have been + * freed in the transaction. The ranges are listed in the perag[] busy list + * between when they're freed and the transaction is committed to disk. + */ + +typedef struct xfs_log_busy_slot { + xfs_agnumber_t lbc_ag; + ushort lbc_idx; /* index in perag.busy[] */ +} xfs_log_busy_slot_t; + +#define XFS_LBC_NUM_SLOTS 31 +typedef struct xfs_log_busy_chunk { + struct xfs_log_busy_chunk *lbc_next; + uint lbc_free; /* free slots bitmask */ + ushort lbc_unused; /* first unused */ + xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; +} xfs_log_busy_chunk_t; + +#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) +#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) + +#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) +#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) +#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) +#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) +#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) + +/* + * This is the type of function which can be given to xfs_trans_callback() + * to be called upon the transaction's commit to disk. + */ +typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *); + +/* + * This is the structure maintained for every active transaction. + */ +typedef struct xfs_trans { + unsigned int t_magic; /* magic number */ + xfs_log_callback_t t_logcb; /* log callback struct */ + unsigned int t_type; /* transaction type */ + unsigned int t_log_res; /* amt of log space resvd */ + unsigned int t_log_count; /* count for perm log res */ + unsigned int t_blk_res; /* # of blocks resvd */ + unsigned int t_blk_res_used; /* # of resvd blocks used */ + unsigned int t_rtx_res; /* # of rt extents resvd */ + unsigned int t_rtx_res_used; /* # of resvd rt extents used */ + xfs_log_ticket_t t_ticket; /* log mgr ticket */ + xfs_lsn_t t_lsn; /* log seq num of start of + * transaction. */ + xfs_lsn_t t_commit_lsn; /* log seq num of end of + * transaction. */ + struct xfs_mount *t_mountp; /* ptr to fs mount struct */ + struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ + xfs_trans_callback_t t_callback; /* transaction callback */ + void *t_callarg; /* callback arg */ + unsigned int t_flags; /* misc flags */ + int64_t t_icount_delta; /* superblock icount change */ + int64_t t_ifree_delta; /* superblock ifree change */ + int64_t t_fdblocks_delta; /* superblock fdblocks chg */ + int64_t t_res_fdblocks_delta; /* on-disk only chg */ + int64_t t_frextents_delta;/* superblock freextents chg*/ + int64_t t_res_frextents_delta; /* on-disk only chg */ +#ifdef DEBUG + int64_t t_ag_freeblks_delta; /* debugging counter */ + int64_t t_ag_flist_delta; /* debugging counter */ + int64_t t_ag_btree_delta; /* debugging counter */ +#endif + int64_t t_dblocks_delta;/* superblock dblocks change */ + int64_t t_agcount_delta;/* superblock agcount change */ + int64_t t_imaxpct_delta;/* superblock imaxpct change */ + int64_t t_rextsize_delta;/* superblock rextsize chg */ + int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */ + int64_t t_rblocks_delta;/* superblock rblocks change */ + int64_t t_rextents_delta;/* superblocks rextents chg */ + int64_t t_rextslog_delta;/* superblocks rextslog chg */ + unsigned int t_items_free; /* log item descs free */ + xfs_log_item_chunk_t t_items; /* first log item desc chunk */ + xfs_trans_header_t t_header; /* header for in-log trans */ + unsigned int t_busy_free; /* busy descs free */ + xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ + unsigned long t_pflags; /* saved process flags state */ +} xfs_trans_t; + /* * XFS transaction mechanism exported interfaces that are * actually macros. @@ -928,7 +922,6 @@ typedef struct xfs_trans { /* * XFS transaction mechanism exported interfaces. */ -void xfs_trans_init(struct xfs_mount *); xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); @@ -975,7 +968,6 @@ int _xfs_trans_commit(xfs_trans_t *, int *); #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) void xfs_trans_cancel(xfs_trans_t *, int); -int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); @@ -990,4 +982,7 @@ extern kmem_zone_t *xfs_trans_zone; #endif /* __KERNEL__ */ +void xfs_trans_init(struct xfs_mount *); +int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); + #endif /* __XFS_TRANS_H__ */ -- cgit v1.2.3 From 89b2839319cb0c0364d55dc6fd6d3838e864ab54 Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Thu, 30 Oct 2008 17:05:49 +1100 Subject: [XFS] Check agf_btreeblks is valid when reading in the AGF SGI-PV: 987683 SGI-Modid: xfs-linux-melb:xfs-kern:32232a Signed-off-by: Barry Naujok Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 54fa69e2776..0a2a87208b1 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2272,6 +2272,9 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= + be32_to_cpu(agf->agf_length); if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, XFS_RANDOM_ALLOC_READ_AGF))) { XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", @@ -2297,6 +2300,7 @@ xfs_alloc_read_agf( #ifdef DEBUG else if (!XFS_FORCED_SHUTDOWN(mp)) { ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); + ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == -- cgit v1.2.3 From 34519daae6778d129d56688f75ade27f6011bac9 Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Thu, 30 Oct 2008 17:05:58 +1100 Subject: [XFS] Show buffer address with debug hexdump on corruption SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32233a Signed-off-by: Barry Naujok Signed-off-by: Eric Sandeen Signed-off-by: Lachlan McIlroy --- fs/xfs/support/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index c27abef7b84..636104254cf 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -84,5 +84,5 @@ assfail(char *expr, char *file, int line) void xfs_hex_dump(void *p, int length) { - print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); } -- cgit v1.2.3 From fe4fa4b8e463fa5848ef9e86ed75d27501d0da1e Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:06:08 +1100 Subject: [XFS] move sync code to its own file The sync code in XFS is spread around several files. While it used to make sense to have such a distribution, the code is about to be cleaned up and so centralising it in one spot as the first step makes sense. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32282a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/linux-2.6/xfs_super.c | 1 + fs/xfs/linux-2.6/xfs_sync.c | 605 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_sync.h | 7 + fs/xfs/xfs_vfsops.c | 560 +-------------------------------------- fs/xfs/xfs_vfsops.h | 1 - 6 files changed, 615 insertions(+), 560 deletions(-) create mode 100644 fs/xfs/linux-2.6/xfs_sync.c create mode 100644 fs/xfs/linux-2.6/xfs_sync.h (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 75b2be72c39..51b87de97f8 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -107,6 +107,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_iops.o \ xfs_lrw.o \ xfs_super.o \ + xfs_sync.o \ xfs_vnode.o \ xfs_xattr.o) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 0c71c215853..727d0e47e80 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -59,6 +59,7 @@ #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" #include "xfs_inode_item.h" +#include "xfs_sync.h" #include #include diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c new file mode 100644 index 00000000000..c765eb2a8dc --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -0,0 +1,605 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_inode.h" +#include "xfs_dinode.h" +#include "xfs_error.h" +#include "xfs_mru_cache.h" +#include "xfs_filestream.h" +#include "xfs_vnodeops.h" +#include "xfs_utils.h" +#include "xfs_buf_item.h" +#include "xfs_inode_item.h" +#include "xfs_rw.h" + +/* + * xfs_sync flushes any pending I/O to file system vfsp. + * + * This routine is called by vfs_sync() to make sure that things make it + * out to disk eventually, on sync() system calls to flush out everything, + * and when the file system is unmounted. For the vfs_sync() case, all + * we really need to do is sync out the log to make all of our meta-data + * updates permanent (except for timestamps). For calls from pflushd(), + * dirty pages are kept moving by calling pdflush() on the inodes + * containing them. We also flush the inodes that we can lock without + * sleeping and the superblock if we can lock it without sleeping from + * vfs_sync() so that items at the tail of the log are always moving out. + * + * Flags: + * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want + * to sleep if we can help it. All we really need + * to do is ensure that the log is synced at least + * periodically. We also push the inodes and + * superblock if we can lock them without sleeping + * and they are not pinned. + * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not + * set, then we really want to lock each inode and flush + * it. + * SYNC_WAIT - All the flushes that take place in this call should + * be synchronous. + * SYNC_DELWRI - This tells us to push dirty pages associated with + * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to + * determine if they should be flushed sync, async, or + * delwri. + * SYNC_CLOSE - This flag is passed when the system is being + * unmounted. We should sync and invalidate everything. + * SYNC_FSDATA - This indicates that the caller would like to make + * sure the superblock is safe on disk. We can ensure + * this by simply making sure the log gets flushed + * if SYNC_BDFLUSH is set, and by actually writing it + * out otherwise. + * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete + * before we return (including direct I/O). Forms the drain + * side of the write barrier needed to safely quiesce the + * filesystem. + * + */ +int +xfs_sync( + xfs_mount_t *mp, + int flags) +{ + int error; + + /* + * Get the Quota Manager to flush the dquots. + * + * If XFS quota support is not enabled or this filesystem + * instance does not use quotas XFS_QM_DQSYNC will always + * return zero. + */ + error = XFS_QM_DQSYNC(mp, flags); + if (error) { + /* + * If we got an IO error, we will be shutting down. + * So, there's nothing more for us to do here. + */ + ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(error); + } + + if (flags & SYNC_IOWAIT) + xfs_filestream_flush(mp); + + return xfs_syncsub(mp, flags, NULL); +} + +/* + * xfs sync routine for internal use + * + * This routine supports all of the flags defined for the generic vfs_sync + * interface as explained above under xfs_sync. + * + */ +int +xfs_sync_inodes( + xfs_mount_t *mp, + int flags, + int *bypassed) +{ + xfs_inode_t *ip = NULL; + struct inode *vp = NULL; + int error; + int last_error; + uint64_t fflag; + uint lock_flags; + uint base_lock_flags; + boolean_t mount_locked; + boolean_t vnode_refed; + int preempt; + xfs_iptr_t *ipointer; +#ifdef DEBUG + boolean_t ipointer_in = B_FALSE; + +#define IPOINTER_SET ipointer_in = B_TRUE +#define IPOINTER_CLR ipointer_in = B_FALSE +#else +#define IPOINTER_SET +#define IPOINTER_CLR +#endif + + +/* Insert a marker record into the inode list after inode ip. The list + * must be locked when this is called. After the call the list will no + * longer be locked. + */ +#define IPOINTER_INSERT(ip, mp) { \ + ASSERT(ipointer_in == B_FALSE); \ + ipointer->ip_mnext = ip->i_mnext; \ + ipointer->ip_mprev = ip; \ + ip->i_mnext = (xfs_inode_t *)ipointer; \ + ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ + preempt = 0; \ + XFS_MOUNT_IUNLOCK(mp); \ + mount_locked = B_FALSE; \ + IPOINTER_SET; \ + } + +/* Remove the marker from the inode list. If the marker was the only item + * in the list then there are no remaining inodes and we should zero out + * the whole list. If we are the current head of the list then move the head + * past us. + */ +#define IPOINTER_REMOVE(ip, mp) { \ + ASSERT(ipointer_in == B_TRUE); \ + if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ + ip = ipointer->ip_mnext; \ + ip->i_mprev = ipointer->ip_mprev; \ + ipointer->ip_mprev->i_mnext = ip; \ + if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ + mp->m_inodes = ip; \ + } \ + } else { \ + ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ + mp->m_inodes = NULL; \ + ip = NULL; \ + } \ + IPOINTER_CLR; \ + } + +#define XFS_PREEMPT_MASK 0x7f + + ASSERT(!(flags & SYNC_BDFLUSH)); + + if (bypassed) + *bypassed = 0; + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; + error = 0; + last_error = 0; + preempt = 0; + + /* Allocate a reference marker */ + ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); + + fflag = XFS_B_ASYNC; /* default is don't wait */ + if (flags & SYNC_DELWRI) + fflag = XFS_B_DELWRI; + if (flags & SYNC_WAIT) + fflag = 0; /* synchronous overrides all */ + + base_lock_flags = XFS_ILOCK_SHARED; + if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { + /* + * We need the I/O lock if we're going to call any of + * the flush/inval routines. + */ + base_lock_flags |= XFS_IOLOCK_SHARED; + } + + XFS_MOUNT_ILOCK(mp); + + ip = mp->m_inodes; + + mount_locked = B_TRUE; + vnode_refed = B_FALSE; + + IPOINTER_CLR; + + do { + ASSERT(ipointer_in == B_FALSE); + ASSERT(vnode_refed == B_FALSE); + + lock_flags = base_lock_flags; + + /* + * There were no inodes in the list, just break out + * of the loop. + */ + if (ip == NULL) { + break; + } + + /* + * We found another sync thread marker - skip it + */ + if (ip->i_mount == NULL) { + ip = ip->i_mnext; + continue; + } + + vp = VFS_I(ip); + + /* + * If the vnode is gone then this is being torn down, + * call reclaim if it is flushed, else let regular flush + * code deal with it later in the loop. + */ + + if (vp == NULL) { + /* Skip ones already in reclaim */ + if (ip->i_flags & XFS_IRECLAIM) { + ip = ip->i_mnext; + continue; + } + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { + ip = ip->i_mnext; + } else if ((xfs_ipincount(ip) == 0) && + xfs_iflock_nowait(ip)) { + IPOINTER_INSERT(ip, mp); + + xfs_finish_reclaim(ip, 1, + XFS_IFLUSH_DELWRI_ELSE_ASYNC); + + XFS_MOUNT_ILOCK(mp); + mount_locked = B_TRUE; + IPOINTER_REMOVE(ip, mp); + } else { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + ip = ip->i_mnext; + } + continue; + } + + if (VN_BAD(vp)) { + ip = ip->i_mnext; + continue; + } + + if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { + XFS_MOUNT_IUNLOCK(mp); + kmem_free(ipointer); + return 0; + } + + /* + * Try to lock without sleeping. We're out of order with + * the inode list lock here, so if we fail we need to drop + * the mount lock and try again. If we're called from + * bdflush() here, then don't bother. + * + * The inode lock here actually coordinates with the + * almost spurious inode lock in xfs_ireclaim() to prevent + * the vnode we handle here without a reference from + * being freed while we reference it. If we lock the inode + * while it's on the mount list here, then the spurious inode + * lock in xfs_ireclaim() after the inode is pulled from + * the mount list will sleep until we release it here. + * This keeps the vnode from being freed while we reference + * it. + */ + if (xfs_ilock_nowait(ip, lock_flags) == 0) { + if (vp == NULL) { + ip = ip->i_mnext; + continue; + } + + vp = vn_grab(vp); + if (vp == NULL) { + ip = ip->i_mnext; + continue; + } + + IPOINTER_INSERT(ip, mp); + xfs_ilock(ip, lock_flags); + + ASSERT(vp == VFS_I(ip)); + ASSERT(ip->i_mount == mp); + + vnode_refed = B_TRUE; + } + + /* From here on in the loop we may have a marker record + * in the inode list. + */ + + /* + * If we have to flush data or wait for I/O completion + * we need to drop the ilock that we currently hold. + * If we need to drop the lock, insert a marker if we + * have not already done so. + */ + if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || + ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { + if (mount_locked) { + IPOINTER_INSERT(ip, mp); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (flags & SYNC_CLOSE) { + /* Shutdown case. Flush and invalidate. */ + if (XFS_FORCED_SHUTDOWN(mp)) + xfs_tosspages(ip, 0, -1, + FI_REMAPF); + else + error = xfs_flushinval_pages(ip, + 0, -1, FI_REMAPF); + } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { + error = xfs_flush_pages(ip, 0, + -1, fflag, FI_NONE); + } + + /* + * When freezing, we need to wait ensure all I/O (including direct + * I/O) is complete to ensure no further data modification can take + * place after this point + */ + if (flags & SYNC_IOWAIT) + vn_iowait(ip); + + xfs_ilock(ip, XFS_ILOCK_SHARED); + } + + if ((flags & SYNC_ATTR) && + (ip->i_update_core || + (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { + if (mount_locked) + IPOINTER_INSERT(ip, mp); + + if (flags & SYNC_WAIT) { + xfs_iflock(ip); + error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + + /* + * If we can't acquire the flush lock, then the inode + * is already being flushed so don't bother waiting. + * + * If we can lock it then do a delwri flush so we can + * combine multiple inode flushes in each disk write. + */ + } else if (xfs_iflock_nowait(ip)) { + error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + } else if (bypassed) { + (*bypassed)++; + } + } + + if (lock_flags != 0) { + xfs_iunlock(ip, lock_flags); + } + + if (vnode_refed) { + /* + * If we had to take a reference on the vnode + * above, then wait until after we've unlocked + * the inode to release the reference. This is + * because we can be already holding the inode + * lock when IRELE() calls xfs_inactive(). + * + * Make sure to drop the mount lock before calling + * IRELE() so that we don't trip over ourselves if + * we have to go for the mount lock again in the + * inactive code. + */ + if (mount_locked) { + IPOINTER_INSERT(ip, mp); + } + + IRELE(ip); + + vnode_refed = B_FALSE; + } + + if (error) { + last_error = error; + } + + /* + * bail out if the filesystem is corrupted. + */ + if (error == EFSCORRUPTED) { + if (!mount_locked) { + XFS_MOUNT_ILOCK(mp); + IPOINTER_REMOVE(ip, mp); + } + XFS_MOUNT_IUNLOCK(mp); + ASSERT(ipointer_in == B_FALSE); + kmem_free(ipointer); + return XFS_ERROR(error); + } + + /* Let other threads have a chance at the mount lock + * if we have looped many times without dropping the + * lock. + */ + if ((++preempt & XFS_PREEMPT_MASK) == 0) { + if (mount_locked) { + IPOINTER_INSERT(ip, mp); + } + } + + if (mount_locked == B_FALSE) { + XFS_MOUNT_ILOCK(mp); + mount_locked = B_TRUE; + IPOINTER_REMOVE(ip, mp); + continue; + } + + ASSERT(ipointer_in == B_FALSE); + ip = ip->i_mnext; + + } while (ip != mp->m_inodes); + + XFS_MOUNT_IUNLOCK(mp); + + ASSERT(ipointer_in == B_FALSE); + + kmem_free(ipointer); + return XFS_ERROR(last_error); +} + +/* + * xfs sync routine for internal use + * + * This routine supports all of the flags defined for the generic vfs_sync + * interface as explained above under xfs_sync. + * + */ +int +xfs_syncsub( + xfs_mount_t *mp, + int flags, + int *bypassed) +{ + int error = 0; + int last_error = 0; + uint log_flags = XFS_LOG_FORCE; + xfs_buf_t *bp; + xfs_buf_log_item_t *bip; + + /* + * Sync out the log. This ensures that the log is periodically + * flushed even if there is not enough activity to fill it up. + */ + if (flags & SYNC_WAIT) + log_flags |= XFS_LOG_SYNC; + + xfs_log_force(mp, (xfs_lsn_t)0, log_flags); + + if (flags & (SYNC_ATTR|SYNC_DELWRI)) { + if (flags & SYNC_BDFLUSH) + xfs_finish_reclaim_all(mp, 1); + else + error = xfs_sync_inodes(mp, flags, bypassed); + } + + /* + * Flushing out dirty data above probably generated more + * log activity, so if this isn't vfs_sync() then flush + * the log again. + */ + if (flags & SYNC_DELWRI) { + xfs_log_force(mp, (xfs_lsn_t)0, log_flags); + } + + if (flags & SYNC_FSDATA) { + /* + * If this is vfs_sync() then only sync the superblock + * if we can lock it without sleeping and it is not pinned. + */ + if (flags & SYNC_BDFLUSH) { + bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + if (bp != NULL) { + bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); + if ((bip != NULL) && + xfs_buf_item_dirty(bip)) { + if (!(XFS_BUF_ISPINNED(bp))) { + XFS_BUF_ASYNC(bp); + error = xfs_bwrite(mp, bp); + } else { + xfs_buf_relse(bp); + } + } else { + xfs_buf_relse(bp); + } + } + } else { + bp = xfs_getsb(mp, 0); + /* + * If the buffer is pinned then push on the log so + * we won't get stuck waiting in the write for + * someone, maybe ourselves, to flush the log. + * Even though we just pushed the log above, we + * did not have the superblock buffer locked at + * that point so it can become pinned in between + * there and here. + */ + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + if (flags & SYNC_WAIT) + XFS_BUF_UNASYNC(bp); + else + XFS_BUF_ASYNC(bp); + error = xfs_bwrite(mp, bp); + } + if (error) { + last_error = error; + } + } + + /* + * Now check to see if the log needs a "dummy" transaction. + */ + if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { + xfs_trans_t *tp; + xfs_inode_t *ip; + + /* + * Put a dummy transaction in the log to tell + * recovery that all others are OK. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); + if ((error = xfs_trans_reserve(tp, 0, + XFS_ICHANGE_LOG_RES(mp), + 0, 0, 0))) { + xfs_trans_cancel(tp, 0); + return error; + } + + ip = mp->m_rootip; + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_log_force(mp, (xfs_lsn_t)0, log_flags); + } + + /* + * When shutting down, we need to insure that the AIL is pushed + * to disk or the filesystem can appear corrupt from the PROM. + */ + if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { + XFS_bflush(mp->m_ddev_targp); + if (mp->m_rtdev_targp) { + XFS_bflush(mp->m_rtdev_targp); + } + } + + return XFS_ERROR(last_error); +} diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h new file mode 100644 index 00000000000..f4c3b1ea64c --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -0,0 +1,7 @@ +#ifndef XFS_SYNC_H +#define XFS_SYNC_H 1 + +int xfs_sync(struct xfs_mount *mp, int flags); +int xfs_syncsub(struct xfs_mount *mp, int flags, int *bypassed); + +#endif diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 439dd3939dd..01e274b902c 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -56,6 +56,7 @@ #include "xfs_vnodeops.h" #include "xfs_vfsops.h" #include "xfs_utils.h" +#include "xfs_sync.h" STATIC void @@ -196,562 +197,3 @@ fscorrupt_out2: return XFS_ERROR(EFSCORRUPTED); } -/* - * xfs_sync flushes any pending I/O to file system vfsp. - * - * This routine is called by vfs_sync() to make sure that things make it - * out to disk eventually, on sync() system calls to flush out everything, - * and when the file system is unmounted. For the vfs_sync() case, all - * we really need to do is sync out the log to make all of our meta-data - * updates permanent (except for timestamps). For calls from pflushd(), - * dirty pages are kept moving by calling pdflush() on the inodes - * containing them. We also flush the inodes that we can lock without - * sleeping and the superblock if we can lock it without sleeping from - * vfs_sync() so that items at the tail of the log are always moving out. - * - * Flags: - * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want - * to sleep if we can help it. All we really need - * to do is ensure that the log is synced at least - * periodically. We also push the inodes and - * superblock if we can lock them without sleeping - * and they are not pinned. - * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not - * set, then we really want to lock each inode and flush - * it. - * SYNC_WAIT - All the flushes that take place in this call should - * be synchronous. - * SYNC_DELWRI - This tells us to push dirty pages associated with - * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to - * determine if they should be flushed sync, async, or - * delwri. - * SYNC_CLOSE - This flag is passed when the system is being - * unmounted. We should sync and invalidate everything. - * SYNC_FSDATA - This indicates that the caller would like to make - * sure the superblock is safe on disk. We can ensure - * this by simply making sure the log gets flushed - * if SYNC_BDFLUSH is set, and by actually writing it - * out otherwise. - * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete - * before we return (including direct I/O). Forms the drain - * side of the write barrier needed to safely quiesce the - * filesystem. - * - */ -int -xfs_sync( - xfs_mount_t *mp, - int flags) -{ - int error; - - /* - * Get the Quota Manager to flush the dquots. - * - * If XFS quota support is not enabled or this filesystem - * instance does not use quotas XFS_QM_DQSYNC will always - * return zero. - */ - error = XFS_QM_DQSYNC(mp, flags); - if (error) { - /* - * If we got an IO error, we will be shutting down. - * So, there's nothing more for us to do here. - */ - ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(error); - } - - if (flags & SYNC_IOWAIT) - xfs_filestream_flush(mp); - - return xfs_syncsub(mp, flags, NULL); -} - -/* - * xfs sync routine for internal use - * - * This routine supports all of the flags defined for the generic vfs_sync - * interface as explained above under xfs_sync. - * - */ -int -xfs_sync_inodes( - xfs_mount_t *mp, - int flags, - int *bypassed) -{ - xfs_inode_t *ip = NULL; - struct inode *vp = NULL; - int error; - int last_error; - uint64_t fflag; - uint lock_flags; - uint base_lock_flags; - boolean_t mount_locked; - boolean_t vnode_refed; - int preempt; - xfs_iptr_t *ipointer; -#ifdef DEBUG - boolean_t ipointer_in = B_FALSE; - -#define IPOINTER_SET ipointer_in = B_TRUE -#define IPOINTER_CLR ipointer_in = B_FALSE -#else -#define IPOINTER_SET -#define IPOINTER_CLR -#endif - - -/* Insert a marker record into the inode list after inode ip. The list - * must be locked when this is called. After the call the list will no - * longer be locked. - */ -#define IPOINTER_INSERT(ip, mp) { \ - ASSERT(ipointer_in == B_FALSE); \ - ipointer->ip_mnext = ip->i_mnext; \ - ipointer->ip_mprev = ip; \ - ip->i_mnext = (xfs_inode_t *)ipointer; \ - ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ - preempt = 0; \ - XFS_MOUNT_IUNLOCK(mp); \ - mount_locked = B_FALSE; \ - IPOINTER_SET; \ - } - -/* Remove the marker from the inode list. If the marker was the only item - * in the list then there are no remaining inodes and we should zero out - * the whole list. If we are the current head of the list then move the head - * past us. - */ -#define IPOINTER_REMOVE(ip, mp) { \ - ASSERT(ipointer_in == B_TRUE); \ - if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ - ip = ipointer->ip_mnext; \ - ip->i_mprev = ipointer->ip_mprev; \ - ipointer->ip_mprev->i_mnext = ip; \ - if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ - mp->m_inodes = ip; \ - } \ - } else { \ - ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ - mp->m_inodes = NULL; \ - ip = NULL; \ - } \ - IPOINTER_CLR; \ - } - -#define XFS_PREEMPT_MASK 0x7f - - ASSERT(!(flags & SYNC_BDFLUSH)); - - if (bypassed) - *bypassed = 0; - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; - error = 0; - last_error = 0; - preempt = 0; - - /* Allocate a reference marker */ - ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); - - fflag = XFS_B_ASYNC; /* default is don't wait */ - if (flags & SYNC_DELWRI) - fflag = XFS_B_DELWRI; - if (flags & SYNC_WAIT) - fflag = 0; /* synchronous overrides all */ - - base_lock_flags = XFS_ILOCK_SHARED; - if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { - /* - * We need the I/O lock if we're going to call any of - * the flush/inval routines. - */ - base_lock_flags |= XFS_IOLOCK_SHARED; - } - - XFS_MOUNT_ILOCK(mp); - - ip = mp->m_inodes; - - mount_locked = B_TRUE; - vnode_refed = B_FALSE; - - IPOINTER_CLR; - - do { - ASSERT(ipointer_in == B_FALSE); - ASSERT(vnode_refed == B_FALSE); - - lock_flags = base_lock_flags; - - /* - * There were no inodes in the list, just break out - * of the loop. - */ - if (ip == NULL) { - break; - } - - /* - * We found another sync thread marker - skip it - */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; - } - - vp = VFS_I(ip); - - /* - * If the vnode is gone then this is being torn down, - * call reclaim if it is flushed, else let regular flush - * code deal with it later in the loop. - */ - - if (vp == NULL) { - /* Skip ones already in reclaim */ - if (ip->i_flags & XFS_IRECLAIM) { - ip = ip->i_mnext; - continue; - } - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - ip = ip->i_mnext; - } else if ((xfs_ipincount(ip) == 0) && - xfs_iflock_nowait(ip)) { - IPOINTER_INSERT(ip, mp); - - xfs_finish_reclaim(ip, 1, - XFS_IFLUSH_DELWRI_ELSE_ASYNC); - - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - IPOINTER_REMOVE(ip, mp); - } else { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - ip = ip->i_mnext; - } - continue; - } - - if (VN_BAD(vp)) { - ip = ip->i_mnext; - continue; - } - - if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { - XFS_MOUNT_IUNLOCK(mp); - kmem_free(ipointer); - return 0; - } - - /* - * Try to lock without sleeping. We're out of order with - * the inode list lock here, so if we fail we need to drop - * the mount lock and try again. If we're called from - * bdflush() here, then don't bother. - * - * The inode lock here actually coordinates with the - * almost spurious inode lock in xfs_ireclaim() to prevent - * the vnode we handle here without a reference from - * being freed while we reference it. If we lock the inode - * while it's on the mount list here, then the spurious inode - * lock in xfs_ireclaim() after the inode is pulled from - * the mount list will sleep until we release it here. - * This keeps the vnode from being freed while we reference - * it. - */ - if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if (vp == NULL) { - ip = ip->i_mnext; - continue; - } - - vp = vn_grab(vp); - if (vp == NULL) { - ip = ip->i_mnext; - continue; - } - - IPOINTER_INSERT(ip, mp); - xfs_ilock(ip, lock_flags); - - ASSERT(vp == VFS_I(ip)); - ASSERT(ip->i_mount == mp); - - vnode_refed = B_TRUE; - } - - /* From here on in the loop we may have a marker record - * in the inode list. - */ - - /* - * If we have to flush data or wait for I/O completion - * we need to drop the ilock that we currently hold. - * If we need to drop the lock, insert a marker if we - * have not already done so. - */ - if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || - ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (flags & SYNC_CLOSE) { - /* Shutdown case. Flush and invalidate. */ - if (XFS_FORCED_SHUTDOWN(mp)) - xfs_tosspages(ip, 0, -1, - FI_REMAPF); - else - error = xfs_flushinval_pages(ip, - 0, -1, FI_REMAPF); - } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { - error = xfs_flush_pages(ip, 0, - -1, fflag, FI_NONE); - } - - /* - * When freezing, we need to wait ensure all I/O (including direct - * I/O) is complete to ensure no further data modification can take - * place after this point - */ - if (flags & SYNC_IOWAIT) - vn_iowait(ip); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - } - - if ((flags & SYNC_ATTR) && - (ip->i_update_core || - (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { - if (mount_locked) - IPOINTER_INSERT(ip, mp); - - if (flags & SYNC_WAIT) { - xfs_iflock(ip); - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - - /* - * If we can't acquire the flush lock, then the inode - * is already being flushed so don't bother waiting. - * - * If we can lock it then do a delwri flush so we can - * combine multiple inode flushes in each disk write. - */ - } else if (xfs_iflock_nowait(ip)) { - error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); - } else if (bypassed) { - (*bypassed)++; - } - } - - if (lock_flags != 0) { - xfs_iunlock(ip, lock_flags); - } - - if (vnode_refed) { - /* - * If we had to take a reference on the vnode - * above, then wait until after we've unlocked - * the inode to release the reference. This is - * because we can be already holding the inode - * lock when IRELE() calls xfs_inactive(). - * - * Make sure to drop the mount lock before calling - * IRELE() so that we don't trip over ourselves if - * we have to go for the mount lock again in the - * inactive code. - */ - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - - IRELE(ip); - - vnode_refed = B_FALSE; - } - - if (error) { - last_error = error; - } - - /* - * bail out if the filesystem is corrupted. - */ - if (error == EFSCORRUPTED) { - if (!mount_locked) { - XFS_MOUNT_ILOCK(mp); - IPOINTER_REMOVE(ip, mp); - } - XFS_MOUNT_IUNLOCK(mp); - ASSERT(ipointer_in == B_FALSE); - kmem_free(ipointer); - return XFS_ERROR(error); - } - - /* Let other threads have a chance at the mount lock - * if we have looped many times without dropping the - * lock. - */ - if ((++preempt & XFS_PREEMPT_MASK) == 0) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - } - - if (mount_locked == B_FALSE) { - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - IPOINTER_REMOVE(ip, mp); - continue; - } - - ASSERT(ipointer_in == B_FALSE); - ip = ip->i_mnext; - - } while (ip != mp->m_inodes); - - XFS_MOUNT_IUNLOCK(mp); - - ASSERT(ipointer_in == B_FALSE); - - kmem_free(ipointer); - return XFS_ERROR(last_error); -} - -/* - * xfs sync routine for internal use - * - * This routine supports all of the flags defined for the generic vfs_sync - * interface as explained above under xfs_sync. - * - */ -int -xfs_syncsub( - xfs_mount_t *mp, - int flags, - int *bypassed) -{ - int error = 0; - int last_error = 0; - uint log_flags = XFS_LOG_FORCE; - xfs_buf_t *bp; - xfs_buf_log_item_t *bip; - - /* - * Sync out the log. This ensures that the log is periodically - * flushed even if there is not enough activity to fill it up. - */ - if (flags & SYNC_WAIT) - log_flags |= XFS_LOG_SYNC; - - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - - if (flags & (SYNC_ATTR|SYNC_DELWRI)) { - if (flags & SYNC_BDFLUSH) - xfs_finish_reclaim_all(mp, 1); - else - error = xfs_sync_inodes(mp, flags, bypassed); - } - - /* - * Flushing out dirty data above probably generated more - * log activity, so if this isn't vfs_sync() then flush - * the log again. - */ - if (flags & SYNC_DELWRI) { - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - } - - if (flags & SYNC_FSDATA) { - /* - * If this is vfs_sync() then only sync the superblock - * if we can lock it without sleeping and it is not pinned. - */ - if (flags & SYNC_BDFLUSH) { - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); - if (bp != NULL) { - bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); - if ((bip != NULL) && - xfs_buf_item_dirty(bip)) { - if (!(XFS_BUF_ISPINNED(bp))) { - XFS_BUF_ASYNC(bp); - error = xfs_bwrite(mp, bp); - } else { - xfs_buf_relse(bp); - } - } else { - xfs_buf_relse(bp); - } - } - } else { - bp = xfs_getsb(mp, 0); - /* - * If the buffer is pinned then push on the log so - * we won't get stuck waiting in the write for - * someone, maybe ourselves, to flush the log. - * Even though we just pushed the log above, we - * did not have the superblock buffer locked at - * that point so it can become pinned in between - * there and here. - */ - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - if (flags & SYNC_WAIT) - XFS_BUF_UNASYNC(bp); - else - XFS_BUF_ASYNC(bp); - error = xfs_bwrite(mp, bp); - } - if (error) { - last_error = error; - } - } - - /* - * Now check to see if the log needs a "dummy" transaction. - */ - if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { - xfs_trans_t *tp; - xfs_inode_t *ip; - - /* - * Put a dummy transaction in the log to tell - * recovery that all others are OK. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - if ((error = xfs_trans_reserve(tp, 0, - XFS_ICHANGE_LOG_RES(mp), - 0, 0, 0))) { - xfs_trans_cancel(tp, 0); - return error; - } - - ip = mp->m_rootip; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - } - - /* - * When shutting down, we need to insure that the AIL is pushed - * to disk or the filesystem can appear corrupt from the PROM. - */ - if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { - XFS_bflush(mp->m_ddev_targp); - if (mp->m_rtdev_targp) { - XFS_bflush(mp->m_rtdev_targp); - } - } - - return XFS_ERROR(last_error); -} diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h index a74b05087da..6701d0ed8ad 100644 --- a/fs/xfs/xfs_vfsops.h +++ b/fs/xfs/xfs_vfsops.h @@ -8,7 +8,6 @@ struct kstatfs; struct xfs_mount; struct xfs_mount_args; -int xfs_sync(struct xfs_mount *mp, int flags); void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, int lnnum); void xfs_attr_quiesce(struct xfs_mount *mp); -- cgit v1.2.3 From a167b17e899a930758506bbc18748078d6fd8c89 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:06:18 +1100 Subject: [XFS] move xfssyncd code to xfs_sync.c Move all the xfssyncd code to the new xfs_sync.c file. This places it closer to the actual code that it interacts with, rather than just being associated with high level VFS code. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32283a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 151 +-------------------------------------- fs/xfs/linux-2.6/xfs_super.h | 3 - fs/xfs/linux-2.6/xfs_sync.c | 163 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_sync.h | 56 +++++++++++++++ fs/xfs/linux-2.6/xfs_vfs.h | 31 -------- fs/xfs/xfs_mount.h | 1 + 6 files changed, 223 insertions(+), 182 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 727d0e47e80..767f38e0e0b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -979,146 +979,6 @@ xfs_fs_clear_inode( ASSERT(XFS_I(inode) == NULL); } -/* - * Enqueue a work item to be picked up by the vfs xfssyncd thread. - * Doing this has two advantages: - * - It saves on stack space, which is tight in certain situations - * - It can be used (with care) as a mechanism to avoid deadlocks. - * Flushing while allocating in a full filesystem requires both. - */ -STATIC void -xfs_syncd_queue_work( - struct xfs_mount *mp, - void *data, - void (*syncer)(struct xfs_mount *, void *)) -{ - struct bhv_vfs_sync_work *work; - - work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); - INIT_LIST_HEAD(&work->w_list); - work->w_syncer = syncer; - work->w_data = data; - work->w_mount = mp; - spin_lock(&mp->m_sync_lock); - list_add_tail(&work->w_list, &mp->m_sync_list); - spin_unlock(&mp->m_sync_lock); - wake_up_process(mp->m_sync_task); -} - -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room... - */ -STATIC void -xfs_flush_inode_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - filemap_flush(inode->i_mapping); - iput(inode); -} - -void -xfs_flush_inode( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); - delay(msecs_to_jiffies(500)); -} - -/* - * This is the "bigger hammer" version of xfs_flush_inode_work... - * (IOW, "If at first you don't succeed, use a Bigger Hammer"). - */ -STATIC void -xfs_flush_device_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - sync_blockdev(mp->m_super->s_bdev); - iput(inode); -} - -void -xfs_flush_device( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); - delay(msecs_to_jiffies(500)); - xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); -} - -STATIC void -xfs_sync_worker( - struct xfs_mount *mp, - void *unused) -{ - int error; - - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) - error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); - mp->m_sync_seq++; - wake_up(&mp->m_wait_single_sync_task); -} - -STATIC int -xfssyncd( - void *arg) -{ - struct xfs_mount *mp = arg; - long timeleft; - bhv_vfs_sync_work_t *work, *n; - LIST_HEAD (tmp); - - set_freezable(); - timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); - for (;;) { - timeleft = schedule_timeout_interruptible(timeleft); - /* swsusp */ - try_to_freeze(); - if (kthread_should_stop() && list_empty(&mp->m_sync_list)) - break; - - spin_lock(&mp->m_sync_lock); - /* - * We can get woken by laptop mode, to do a sync - - * that's the (only!) case where the list would be - * empty with time remaining. - */ - if (!timeleft || list_empty(&mp->m_sync_list)) { - if (!timeleft) - timeleft = xfs_syncd_centisecs * - msecs_to_jiffies(10); - INIT_LIST_HEAD(&mp->m_sync_work.w_list); - list_add_tail(&mp->m_sync_work.w_list, - &mp->m_sync_list); - } - list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) - list_move(&work->w_list, &tmp); - spin_unlock(&mp->m_sync_lock); - - list_for_each_entry_safe(work, n, &tmp, w_list) { - (*work->w_syncer)(mp, work->w_data); - list_del(&work->w_list); - if (work == &mp->m_sync_work) - continue; - kmem_free(work); - } - } - - return 0; -} - STATIC void xfs_free_fsname( struct xfs_mount *mp) @@ -1137,8 +997,7 @@ xfs_fs_put_super( int unmount_event_flags = 0; int error; - kthread_stop(mp->m_sync_task); - + xfs_syncd_stop(mp); xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); #ifdef HAVE_DMAPI @@ -1808,13 +1667,9 @@ xfs_fs_fill_super( goto fail_vnrele; } - mp->m_sync_work.w_syncer = xfs_sync_worker; - mp->m_sync_work.w_mount = mp; - mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); - if (IS_ERR(mp->m_sync_task)) { - error = -PTR_ERR(mp->m_sync_task); + error = xfs_syncd_init(mp); + if (error) goto fail_vnrele; - } xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index fe2ef4e6a0f..56dc48a76fa 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -101,9 +101,6 @@ struct block_device; extern __uint64_t xfs_max_file_offset(unsigned int); -extern void xfs_flush_inode(struct xfs_inode *); -extern void xfs_flush_device(struct xfs_inode *); - extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); extern const struct export_operations xfs_export_operations; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index c765eb2a8dc..a51534c71b3 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -44,6 +44,9 @@ #include "xfs_inode_item.h" #include "xfs_rw.h" +#include +#include + /* * xfs_sync flushes any pending I/O to file system vfsp. * @@ -603,3 +606,163 @@ xfs_syncsub( return XFS_ERROR(last_error); } + +/* + * Enqueue a work item to be picked up by the vfs xfssyncd thread. + * Doing this has two advantages: + * - It saves on stack space, which is tight in certain situations + * - It can be used (with care) as a mechanism to avoid deadlocks. + * Flushing while allocating in a full filesystem requires both. + */ +STATIC void +xfs_syncd_queue_work( + struct xfs_mount *mp, + void *data, + void (*syncer)(struct xfs_mount *, void *)) +{ + struct bhv_vfs_sync_work *work; + + work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); + INIT_LIST_HEAD(&work->w_list); + work->w_syncer = syncer; + work->w_data = data; + work->w_mount = mp; + spin_lock(&mp->m_sync_lock); + list_add_tail(&work->w_list, &mp->m_sync_list); + spin_unlock(&mp->m_sync_lock); + wake_up_process(mp->m_sync_task); +} + +/* + * Flush delayed allocate data, attempting to free up reserved space + * from existing allocations. At this point a new allocation attempt + * has failed with ENOSPC and we are in the process of scratching our + * heads, looking about for more room... + */ +STATIC void +xfs_flush_inode_work( + struct xfs_mount *mp, + void *arg) +{ + struct inode *inode = arg; + filemap_flush(inode->i_mapping); + iput(inode); +} + +void +xfs_flush_inode( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + igrab(inode); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); + delay(msecs_to_jiffies(500)); +} + +/* + * This is the "bigger hammer" version of xfs_flush_inode_work... + * (IOW, "If at first you don't succeed, use a Bigger Hammer"). + */ +STATIC void +xfs_flush_device_work( + struct xfs_mount *mp, + void *arg) +{ + struct inode *inode = arg; + sync_blockdev(mp->m_super->s_bdev); + iput(inode); +} + +void +xfs_flush_device( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + igrab(inode); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); + delay(msecs_to_jiffies(500)); + xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); +} + +STATIC void +xfs_sync_worker( + struct xfs_mount *mp, + void *unused) +{ + int error; + + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) + error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); + mp->m_sync_seq++; + wake_up(&mp->m_wait_single_sync_task); +} + +STATIC int +xfssyncd( + void *arg) +{ + struct xfs_mount *mp = arg; + long timeleft; + bhv_vfs_sync_work_t *work, *n; + LIST_HEAD (tmp); + + set_freezable(); + timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); + for (;;) { + timeleft = schedule_timeout_interruptible(timeleft); + /* swsusp */ + try_to_freeze(); + if (kthread_should_stop() && list_empty(&mp->m_sync_list)) + break; + + spin_lock(&mp->m_sync_lock); + /* + * We can get woken by laptop mode, to do a sync - + * that's the (only!) case where the list would be + * empty with time remaining. + */ + if (!timeleft || list_empty(&mp->m_sync_list)) { + if (!timeleft) + timeleft = xfs_syncd_centisecs * + msecs_to_jiffies(10); + INIT_LIST_HEAD(&mp->m_sync_work.w_list); + list_add_tail(&mp->m_sync_work.w_list, + &mp->m_sync_list); + } + list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) + list_move(&work->w_list, &tmp); + spin_unlock(&mp->m_sync_lock); + + list_for_each_entry_safe(work, n, &tmp, w_list) { + (*work->w_syncer)(mp, work->w_data); + list_del(&work->w_list); + if (work == &mp->m_sync_work) + continue; + kmem_free(work); + } + } + + return 0; +} + +int +xfs_syncd_init( + struct xfs_mount *mp) +{ + mp->m_sync_work.w_syncer = xfs_sync_worker; + mp->m_sync_work.w_mount = mp; + mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); + if (IS_ERR(mp->m_sync_task)) + return -PTR_ERR(mp->m_sync_task); + return 0; +} + +void +xfs_syncd_stop( + struct xfs_mount *mp) +{ + kthread_stop(mp->m_sync_task); +} + diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index f4c3b1ea64c..3746d153ec8 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -1,7 +1,63 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #ifndef XFS_SYNC_H #define XFS_SYNC_H 1 +struct xfs_mount; + +typedef struct bhv_vfs_sync_work { + struct list_head w_list; + struct xfs_mount *w_mount; + void *w_data; /* syncer routine argument */ + void (*w_syncer)(struct xfs_mount *, void *); +} bhv_vfs_sync_work_t; + +#define SYNC_ATTR 0x0001 /* sync attributes */ +#define SYNC_CLOSE 0x0002 /* close file system down */ +#define SYNC_DELWRI 0x0004 /* look at delayed writes */ +#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ +#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ +#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ +#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ +#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ +#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ + +/* + * When remounting a filesystem read-only or freezing the filesystem, + * we have two phases to execute. This first phase is syncing the data + * before we quiesce the fielsystem, and the second is flushing all the + * inodes out after we've waited for all the transactions created by + * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE + * to ensure that the inodes are written to their location on disk + * rather than just existing in transactions in the log. This means + * after a quiesce there is no log replay required to write the inodes + * to disk (this is the main difference between a sync and a quiesce). + */ +#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) +#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) + +int xfs_syncd_init(struct xfs_mount *mp); +void xfs_syncd_stop(struct xfs_mount *mp); + int xfs_sync(struct xfs_mount *mp, int flags); int xfs_syncsub(struct xfs_mount *mp, int flags, int *bypassed); +void xfs_flush_inode(struct xfs_inode *ip); +void xfs_flush_device(struct xfs_inode *ip); + #endif diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 7e60c7776b1..0ab60bc2e76 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -33,37 +33,6 @@ struct xfs_mount_args; typedef struct kstatfs bhv_statvfs_t; -typedef struct bhv_vfs_sync_work { - struct list_head w_list; - struct xfs_mount *w_mount; - void *w_data; /* syncer routine argument */ - void (*w_syncer)(struct xfs_mount *, void *); -} bhv_vfs_sync_work_t; - -#define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_CLOSE 0x0002 /* close file system down */ -#define SYNC_DELWRI 0x0004 /* look at delayed writes */ -#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ -#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ -#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ -#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ -#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ -#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ - -/* - * When remounting a filesystem read-only or freezing the filesystem, - * we have two phases to execute. This first phase is syncing the data - * before we quiesce the fielsystem, and the second is flushing all the - * inodes out after we've waited for all the transactions created by - * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE - * to ensure that the inodes are written to their location on disk - * rather than just existing in transactions in the log. This means - * after a quiesce there is no log replay required to write the inodes - * to disk (this is the main difference between a sync and a quiesce). - */ -#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) -#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) - #define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ #define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ #define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 56c3b122e53..c6846810936 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -18,6 +18,7 @@ #ifndef __XFS_MOUNT_H__ #define __XFS_MOUNT_H__ +#include "xfs_sync.h" typedef struct xfs_trans_reservations { uint tr_write; /* extent alloc trans */ -- cgit v1.2.3 From 75c68f411b1242c8fdaf731078fdd4e77b14981d Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:06:28 +1100 Subject: [XFS] Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() xfs_iflush_all() walks the m_inodes list to find inodes that need reclaiming. We already have such a list - the m_del_inodes list. Replace xfs_iflush_all() with a call to xfs_finish_reclaim_all() and clean up xfs_finish_reclaim_all() to handle the different flush modes now needed. Originally based on a patch from Christoph Hellwig. Version 3 o rediff against new linux-2.6/xfs_sync.c code Version 2 o revert xfs_syncsub() inode reclaim behaviour back to original code o xfs_quiesce_fs() should use XFS_IFLUSH_DELWRI_ELSE_ASYNC, not XFS_IFLUSH_ASYNC, to prevent change of behaviour. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32284a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 2 +- fs/xfs/xfs_inode.c | 35 ----------------------------------- fs/xfs/xfs_inode.h | 3 +-- fs/xfs/xfs_mount.c | 2 +- fs/xfs/xfs_vfsops.c | 2 +- fs/xfs/xfs_vnodeops.c | 42 ++++++++++++++++++------------------------ 6 files changed, 22 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a51534c71b3..cd82ba523dc 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -504,7 +504,7 @@ xfs_syncsub( if (flags & (SYNC_ATTR|SYNC_DELWRI)) { if (flags & SYNC_BDFLUSH) - xfs_finish_reclaim_all(mp, 1); + xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); else error = xfs_sync_inodes(mp, flags, bypassed); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2b1294b8ad7..0c65ba2faa4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3499,41 +3499,6 @@ corrupt_out: } -/* - * Flush all inactive inodes in mp. - */ -void -xfs_iflush_all( - xfs_mount_t *mp) -{ - xfs_inode_t *ip; - - again: - XFS_MOUNT_ILOCK(mp); - ip = mp->m_inodes; - if (ip == NULL) - goto out; - - do { - /* Make sure we skip markers inserted by sync */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; - } - - if (!VFS_I(ip)) { - XFS_MOUNT_IUNLOCK(mp); - xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); - goto again; - } - - ASSERT(vn_count(VFS_I(ip)) == 0); - - ip = ip->i_mnext; - } while (ip != mp->m_inodes); - out: - XFS_MOUNT_IUNLOCK(mp); -} #ifdef XFS_ILOCK_TRACE ktrace_t *xfs_ilock_trace_buf; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a8f1e6833aa..104623b7ec6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -503,7 +503,7 @@ uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); void xfs_ireclaim(xfs_inode_t *); int xfs_finish_reclaim(xfs_inode_t *, int, int); -int xfs_finish_reclaim_all(struct xfs_mount *, int); +int xfs_finish_reclaim_all(struct xfs_mount *, int, int); /* * xfs_inode.c prototypes. @@ -530,7 +530,6 @@ void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); -void xfs_iflush_all(struct xfs_mount *); void xfs_ichgtime(xfs_inode_t *, int); xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, uint); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 15f5dd22fbb..5ec6032d230 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1241,7 +1241,7 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_iflush_all(mp); + xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC); XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 01e274b902c..0c5ee5ec7ee 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -66,7 +66,7 @@ xfs_quiesce_fs( int count = 0, pincount; xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0); + xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* This loop must run at least twice. * The first instance of the loop will flush diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8b6812f66a1..a6714579a41 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2918,36 +2918,30 @@ xfs_finish_reclaim( } int -xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) +xfs_finish_reclaim_all( + xfs_mount_t *mp, + int noblock, + int mode) { - int purged; xfs_inode_t *ip, *n; - int done = 0; - while (!done) { - purged = 0; - XFS_MOUNT_ILOCK(mp); - list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { - if (noblock) { - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) - continue; - if (xfs_ipincount(ip) || - !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } +restart: + XFS_MOUNT_ILOCK(mp); + list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { + if (noblock) { + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) + continue; + if (xfs_ipincount(ip) || + !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; } - XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, - XFS_IFLUSH_DELWRI_ELSE_ASYNC)) - delay(1); - purged = 1; - break; } - - done = !purged; + XFS_MOUNT_IUNLOCK(mp); + if (xfs_finish_reclaim(ip, noblock, mode)) + delay(1); + goto restart; } - XFS_MOUNT_IUNLOCK(mp); return 0; } -- cgit v1.2.3 From 2f8a3ce1c20f20e6494cdb77fed76bc474ca3ca5 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:07:20 +1100 Subject: [XFS] don't block in xfs_qm_dqflush() during async writeback. Normally dquots are written back via delayed write mechanisms. They are flushed to their backing buffer by xfssyncd, which is then pushed out by either AIL or xfsbufd flushing. The flush from the xfssyncd is supposed to be non-blocking, but xfs_qm_dqflush() always waits for pinned duots, which means that it will block for the length of time it takes to do a synchronous log force. This causes unnecessary extra log I/O to be issued whenever we try to flush a busy dquot. Avoid the log forces and blocking xfssyncd by making xfs_qm_dqflush() pay attention to what type of sync it is doing when it sees a pinned dquot and not waiting when doing non-blocking flushes. SGI-PV: 988147 SGI-Modid: xfs-linux-melb:xfs-kern:32287a Signed-off-by: David Chinner Signed-off-by: Peter Leckie Signed-off-by: Lachlan McIlroy --- fs/xfs/quota/xfs_dquot.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index d3f4fbbe248..1e6bf392564 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1221,16 +1221,14 @@ xfs_qm_dqflush( xfs_dqtrace_entry(dqp, "DQFLUSH"); /* - * If not dirty, nada. + * If not dirty, or it's pinned and we are not supposed to + * block, nada. */ - if (!XFS_DQ_IS_DIRTY(dqp)) { + if (!XFS_DQ_IS_DIRTY(dqp) || + (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) { xfs_dqfunlock(dqp); - return (0); + return 0; } - - /* - * Cant flush a pinned dquot. Wait for it. - */ xfs_qm_dqunpin_wait(dqp); /* -- cgit v1.2.3 From 683a897080a053733778b36398186cb1b22c377f Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:07:29 +1100 Subject: [XFS] Use the inode tree for finding dirty inodes Update xfs_sync_inodes to walk the inode radix tree cache to find dirty inodes. This removes a huge bunch of nasty, messy code for traversing the mount inode list safely and removes another user of the mount inode list. Version 3 o rediff against new linux-2.6/xfs_sync.c code Version 2 o add comment explaining use of gang lookups for a single inode o use IRELE, not VN_RELE o move check for ag initialisation to caller. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32290a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 361 +++++++++++++------------------------------- 1 file changed, 101 insertions(+), 260 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index cd82ba523dc..53d85ecb1d5 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -121,356 +121,197 @@ xfs_sync( } /* - * xfs sync routine for internal use - * - * This routine supports all of the flags defined for the generic vfs_sync - * interface as explained above under xfs_sync. - * + * Sync all the inodes in the given AG according to the + * direction given by the flags. */ -int -xfs_sync_inodes( +STATIC int +xfs_sync_inodes_ag( xfs_mount_t *mp, + int ag, int flags, - int *bypassed) + int *bypassed) { xfs_inode_t *ip = NULL; struct inode *vp = NULL; - int error; - int last_error; - uint64_t fflag; - uint lock_flags; - uint base_lock_flags; - boolean_t mount_locked; - boolean_t vnode_refed; - int preempt; - xfs_iptr_t *ipointer; -#ifdef DEBUG - boolean_t ipointer_in = B_FALSE; - -#define IPOINTER_SET ipointer_in = B_TRUE -#define IPOINTER_CLR ipointer_in = B_FALSE -#else -#define IPOINTER_SET -#define IPOINTER_CLR -#endif - - -/* Insert a marker record into the inode list after inode ip. The list - * must be locked when this is called. After the call the list will no - * longer be locked. - */ -#define IPOINTER_INSERT(ip, mp) { \ - ASSERT(ipointer_in == B_FALSE); \ - ipointer->ip_mnext = ip->i_mnext; \ - ipointer->ip_mprev = ip; \ - ip->i_mnext = (xfs_inode_t *)ipointer; \ - ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ - preempt = 0; \ - XFS_MOUNT_IUNLOCK(mp); \ - mount_locked = B_FALSE; \ - IPOINTER_SET; \ - } - -/* Remove the marker from the inode list. If the marker was the only item - * in the list then there are no remaining inodes and we should zero out - * the whole list. If we are the current head of the list then move the head - * past us. - */ -#define IPOINTER_REMOVE(ip, mp) { \ - ASSERT(ipointer_in == B_TRUE); \ - if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ - ip = ipointer->ip_mnext; \ - ip->i_mprev = ipointer->ip_mprev; \ - ipointer->ip_mprev->i_mnext = ip; \ - if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ - mp->m_inodes = ip; \ - } \ - } else { \ - ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ - mp->m_inodes = NULL; \ - ip = NULL; \ - } \ - IPOINTER_CLR; \ - } - -#define XFS_PREEMPT_MASK 0x7f - - ASSERT(!(flags & SYNC_BDFLUSH)); - - if (bypassed) - *bypassed = 0; - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; - error = 0; - last_error = 0; - preempt = 0; - - /* Allocate a reference marker */ - ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); + xfs_perag_t *pag = &mp->m_perag[ag]; + boolean_t vnode_refed = B_FALSE; + int nr_found; + int first_index = 0; + int error = 0; + int last_error = 0; + int fflag = XFS_B_ASYNC; + int lock_flags = XFS_ILOCK_SHARED; - fflag = XFS_B_ASYNC; /* default is don't wait */ if (flags & SYNC_DELWRI) fflag = XFS_B_DELWRI; if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ - base_lock_flags = XFS_ILOCK_SHARED; if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { /* * We need the I/O lock if we're going to call any of * the flush/inval routines. */ - base_lock_flags |= XFS_IOLOCK_SHARED; + lock_flags |= XFS_IOLOCK_SHARED; } - XFS_MOUNT_ILOCK(mp); - - ip = mp->m_inodes; - - mount_locked = B_TRUE; - vnode_refed = B_FALSE; - - IPOINTER_CLR; - do { - ASSERT(ipointer_in == B_FALSE); - ASSERT(vnode_refed == B_FALSE); - - lock_flags = base_lock_flags; - /* - * There were no inodes in the list, just break out - * of the loop. + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. */ - if (ip == NULL) { - break; - } + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void**)&ip, first_index, 1); - /* - * We found another sync thread marker - skip it - */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; } - vp = VFS_I(ip); + /* update the index for the next lookup */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); /* - * If the vnode is gone then this is being torn down, - * call reclaim if it is flushed, else let regular flush - * code deal with it later in the loop. + * skip inodes in reclaim. Let xfs_syncsub do that for + * us so we don't need to worry. */ - - if (vp == NULL) { - /* Skip ones already in reclaim */ - if (ip->i_flags & XFS_IRECLAIM) { - ip = ip->i_mnext; - continue; - } - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - ip = ip->i_mnext; - } else if ((xfs_ipincount(ip) == 0) && - xfs_iflock_nowait(ip)) { - IPOINTER_INSERT(ip, mp); - - xfs_finish_reclaim(ip, 1, - XFS_IFLUSH_DELWRI_ELSE_ASYNC); - - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - IPOINTER_REMOVE(ip, mp); - } else { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - ip = ip->i_mnext; - } + vp = VFS_I(ip); + if (!vp) { + read_unlock(&pag->pag_ici_lock); continue; } + /* bad inodes are dealt with elsewhere */ if (VN_BAD(vp)) { - ip = ip->i_mnext; + read_unlock(&pag->pag_ici_lock); continue; } + /* nothing to sync during shutdown */ if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { - XFS_MOUNT_IUNLOCK(mp); - kmem_free(ipointer); + read_unlock(&pag->pag_ici_lock); return 0; } /* - * Try to lock without sleeping. We're out of order with - * the inode list lock here, so if we fail we need to drop - * the mount lock and try again. If we're called from - * bdflush() here, then don't bother. - * - * The inode lock here actually coordinates with the - * almost spurious inode lock in xfs_ireclaim() to prevent - * the vnode we handle here without a reference from - * being freed while we reference it. If we lock the inode - * while it's on the mount list here, then the spurious inode - * lock in xfs_ireclaim() after the inode is pulled from - * the mount list will sleep until we release it here. - * This keeps the vnode from being freed while we reference - * it. + * The inode lock here actually coordinates with the almost + * spurious inode lock in xfs_ireclaim() to prevent the vnode + * we handle here without a reference from being freed while we + * reference it. If we lock the inode while it's on the mount + * list here, then the spurious inode lock in xfs_ireclaim() + * after the inode is pulled from the mount list will sleep + * until we release it here. This keeps the vnode from being + * freed while we reference it. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if (vp == NULL) { - ip = ip->i_mnext; - continue; - } - vp = vn_grab(vp); - if (vp == NULL) { - ip = ip->i_mnext; + read_unlock(&pag->pag_ici_lock); + if (!vp) continue; - } - - IPOINTER_INSERT(ip, mp); xfs_ilock(ip, lock_flags); ASSERT(vp == VFS_I(ip)); ASSERT(ip->i_mount == mp); vnode_refed = B_TRUE; + } else { + /* safe to unlock here as we have a reference */ + read_unlock(&pag->pag_ici_lock); } - - /* From here on in the loop we may have a marker record - * in the inode list. - */ - /* * If we have to flush data or wait for I/O completion * we need to drop the ilock that we currently hold. * If we need to drop the lock, insert a marker if we * have not already done so. */ - if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || - ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } + if (flags & SYNC_CLOSE) { xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (flags & SYNC_CLOSE) { - /* Shutdown case. Flush and invalidate. */ - if (XFS_FORCED_SHUTDOWN(mp)) - xfs_tosspages(ip, 0, -1, - FI_REMAPF); - else - error = xfs_flushinval_pages(ip, - 0, -1, FI_REMAPF); - } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { - error = xfs_flush_pages(ip, 0, - -1, fflag, FI_NONE); - } - - /* - * When freezing, we need to wait ensure all I/O (including direct - * I/O) is complete to ensure no further data modification can take - * place after this point - */ + if (XFS_FORCED_SHUTDOWN(mp)) + xfs_tosspages(ip, 0, -1, FI_REMAPF); + else + error = xfs_flushinval_pages(ip, 0, -1, + FI_REMAPF); + /* wait for I/O on freeze */ if (flags & SYNC_IOWAIT) vn_iowait(ip); xfs_ilock(ip, XFS_ILOCK_SHARED); } - if ((flags & SYNC_ATTR) && - (ip->i_update_core || - (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { - if (mount_locked) - IPOINTER_INSERT(ip, mp); + if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { + xfs_iunlock(ip, XFS_ILOCK_SHARED); + error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); + if (flags & SYNC_IOWAIT) + vn_iowait(ip); + xfs_ilock(ip, XFS_ILOCK_SHARED); + } + if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { if (flags & SYNC_WAIT) { xfs_iflock(ip); - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - - /* - * If we can't acquire the flush lock, then the inode - * is already being flushed so don't bother waiting. - * - * If we can lock it then do a delwri flush so we can - * combine multiple inode flushes in each disk write. - */ + if (!xfs_inode_clean(ip)) + error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + else + xfs_ifunlock(ip); } else if (xfs_iflock_nowait(ip)) { - error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + if (!xfs_inode_clean(ip)) + error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + else + xfs_ifunlock(ip); } else if (bypassed) { (*bypassed)++; } } - if (lock_flags != 0) { + if (lock_flags) xfs_iunlock(ip, lock_flags); - } if (vnode_refed) { - /* - * If we had to take a reference on the vnode - * above, then wait until after we've unlocked - * the inode to release the reference. This is - * because we can be already holding the inode - * lock when IRELE() calls xfs_inactive(). - * - * Make sure to drop the mount lock before calling - * IRELE() so that we don't trip over ourselves if - * we have to go for the mount lock again in the - * inactive code. - */ - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - IRELE(ip); - vnode_refed = B_FALSE; } - if (error) { + if (error) last_error = error; - } - /* * bail out if the filesystem is corrupted. */ - if (error == EFSCORRUPTED) { - if (!mount_locked) { - XFS_MOUNT_ILOCK(mp); - IPOINTER_REMOVE(ip, mp); - } - XFS_MOUNT_IUNLOCK(mp); - ASSERT(ipointer_in == B_FALSE); - kmem_free(ipointer); + if (error == EFSCORRUPTED) return XFS_ERROR(error); - } - - /* Let other threads have a chance at the mount lock - * if we have looped many times without dropping the - * lock. - */ - if ((++preempt & XFS_PREEMPT_MASK) == 0) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - } - - if (mount_locked == B_FALSE) { - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - IPOINTER_REMOVE(ip, mp); - continue; - } - ASSERT(ipointer_in == B_FALSE); - ip = ip->i_mnext; + } while (nr_found); - } while (ip != mp->m_inodes); + return last_error; +} - XFS_MOUNT_IUNLOCK(mp); +int +xfs_sync_inodes( + xfs_mount_t *mp, + int flags, + int *bypassed) +{ + int error; + int last_error; + int i; - ASSERT(ipointer_in == B_FALSE); + if (bypassed) + *bypassed = 0; + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; + error = 0; + last_error = 0; - kmem_free(ipointer); + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + error = xfs_sync_inodes_ag(mp, i, flags, bypassed); + if (error) + last_error = error; + if (error == EFSCORRUPTED) + break; + } return XFS_ERROR(last_error); } -- cgit v1.2.3 From 5b4d89ae0f5ae45c7fa1dfc616fd2bb8634bb7b7 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:08:03 +1100 Subject: [XFS] Traverse inode trees when releasing dquots Make releasing all inode dquots traverse the per-ag inode radix trees rather than the mount inode list. This removes another user of the mount inode list. Version 3 o fix comment relating to avoiding trying to release the quota inodes and those in reclaim. Version 2 o add comment explaining use of gang lookups for a single inode o use IRELE, not VN_RELE o move check for ag initialisation to caller. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32291a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/quota/xfs_qm_syscalls.c | 127 +++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 68 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 1a3b803dfa5..26152b9ccc6 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1022,101 +1022,92 @@ xfs_qm_export_flags( /* - * Go thru all the inodes in the file system, releasing their dquots. - * Note that the mount structure gets modified to indicate that quotas are off - * AFTER this, in the case of quotaoff. This also gets called from - * xfs_rootumount. + * Release all the dquots on the inodes in an AG. */ -void -xfs_qm_dqrele_all_inodes( - struct xfs_mount *mp, - uint flags) +STATIC void +xfs_qm_dqrele_inodes_ag( + xfs_mount_t *mp, + int ag, + uint flags) { - xfs_inode_t *ip, *topino; - uint ireclaims; - struct inode *vp; - boolean_t vnode_refd; + xfs_inode_t *ip = NULL; + struct inode *vp = NULL; + xfs_perag_t *pag = &mp->m_perag[ag]; + int first_index = 0; + int nr_found; - ASSERT(mp->m_quotainfo); - - XFS_MOUNT_ILOCK(mp); -again: - ip = mp->m_inodes; - if (ip == NULL) { - XFS_MOUNT_IUNLOCK(mp); - return; - } do { - /* Skip markers inserted by xfs_sync */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; - } - /* Root inode, rbmip and rsumip have associated blocks */ - if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { - ASSERT(ip->i_udquot == NULL); - ASSERT(ip->i_gdquot == NULL); - ip = ip->i_mnext; - continue; + boolean_t vnode_refd = B_FALSE; + + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void**)&ip, first_index, 1); + + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; } + + /* update the index for the next lookup */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + + /* skip quota inodes and those in reclaim */ vp = VFS_I(ip); - if (!vp) { + if (!vp || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); - ip = ip->i_mnext; + read_unlock(&pag->pag_ici_lock); continue; } - vnode_refd = B_FALSE; if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - ireclaims = mp->m_ireclaims; - topino = mp->m_inodes; vp = vn_grab(vp); + read_unlock(&pag->pag_ici_lock); if (!vp) - goto again; - - XFS_MOUNT_IUNLOCK(mp); - /* XXX restart limit ? */ - xfs_ilock(ip, XFS_ILOCK_EXCL); + continue; vnode_refd = B_TRUE; + xfs_ilock(ip, XFS_ILOCK_EXCL); } else { - ireclaims = mp->m_ireclaims; - topino = mp->m_inodes; - XFS_MOUNT_IUNLOCK(mp); + read_unlock(&pag->pag_ici_lock); } - - /* - * We don't keep the mountlock across the dqrele() call, - * since it can take a while.. - */ if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } - if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && + ip->i_gdquot) { xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* - * Wait until we've dropped the ilock and mountlock to - * do the vn_rele. Or be condemned to an eternity in the - * inactive code in hell. - */ if (vnode_refd) IRELE(ip); - XFS_MOUNT_ILOCK(mp); - /* - * If an inode was inserted or removed, we gotta - * start over again. - */ - if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { - /* XXX use a sentinel */ - goto again; - } - ip = ip->i_mnext; - } while (ip != mp->m_inodes); + } while (nr_found); +} - XFS_MOUNT_IUNLOCK(mp); +/* + * Go thru all the inodes in the file system, releasing their dquots. + * Note that the mount structure gets modified to indicate that quotas are off + * AFTER this, in the case of quotaoff. This also gets called from + * xfs_rootumount. + */ +void +xfs_qm_dqrele_all_inodes( + struct xfs_mount *mp, + uint flags) +{ + int i; + + ASSERT(mp->m_quotainfo); + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + xfs_qm_dqrele_inodes_ag(mp, i, flags); + } } /*------------------------------------------------------------------------*/ -- cgit v1.2.3 From 60197e8df364df326dcbb987519f367ad0ee1a11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:11:19 +1100 Subject: [XFS] Cleanup maxrecs calculation. Clean up the way the maximum and minimum records for the btree blocks are calculated. For the alloc and inobt btrees all the values are pre-calculated in xfs_mount_common, and we switch the current loop around the ugly generic macros that use cpp token pasting to generate type names to two small helpers in normal C code. For the bmbt and bmdr trees these helpers also exist, but can be called during runtime, too. Here we also kill various macros dealing with them and inline the logic into the get_minrecs / get_maxrecs / get_dmaxrecs methods in xfs_bmap_btree.c. Note that all these new helpers take an xfs_mount * argument which will be needed to determine the size of a btree block once we add support for extended btree blocks with CRCs and other RAS information. SGI-PV: 988146 SGI-Modid: xfs-linux-melb:xfs-kern:32292a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_alloc_btree.c | 16 ++++++++++ fs/xfs/xfs_alloc_btree.h | 7 +---- fs/xfs/xfs_bmap.c | 18 ++++++------ fs/xfs/xfs_bmap_btree.c | 74 ++++++++++++++++++++++++++++++++++++++++++----- fs/xfs/xfs_bmap_btree.h | 48 ++++++++---------------------- fs/xfs/xfs_btree.h | 13 --------- fs/xfs/xfs_dinode.h | 3 +- fs/xfs/xfs_ialloc_btree.c | 16 ++++++++++ fs/xfs/xfs_ialloc_btree.h | 9 +----- fs/xfs/xfs_inode.c | 26 +++++++++-------- fs/xfs/xfs_log_recover.c | 4 +-- fs/xfs/xfs_mount.c | 34 +++++++++------------- fs/xfs/xfs_mount.h | 12 ++++---- 13 files changed, 158 insertions(+), 122 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 9e63f8c180d..6ff27b75b93 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -480,3 +480,19 @@ xfs_allocbt_init_cursor( return cur; } + +/* + * Calculate number of records in an alloc btree block. + */ +int +xfs_allocbt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= sizeof(struct xfs_btree_sblock); + + if (leaf) + return blocklen / sizeof(xfs_alloc_rec_t); + return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +} diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 22f1d709af7..ff1f71d069c 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -55,12 +55,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp)) -/* - * Real block structures have a size equal to the disk block size. - */ -#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0]) -#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0]) - /* * Minimum and maximum blocksize and sectorsize. * The blocksize upper limit is pretty much arbitrary. @@ -98,5 +92,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, xfs_btnum_t); +extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index b7f99d7576d..09e4de4ed50 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3051,15 +3051,15 @@ xfs_bmap_btree_to_extents( __be64 *pp; /* ptr to block address */ xfs_bmbt_block_t *rblock;/* root btree block */ + mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_flags & XFS_IFEXTENTS); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); rblock = ifp->if_broot; ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); - ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1); - mp = ip->i_mount; - pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes); + ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); cbno = be64_to_cpu(*pp); *logflagsp = 0; #ifdef DEBUG @@ -4221,7 +4221,7 @@ xfs_bmap_compute_maxlevels( maxleafents = MAXAEXTNUM; sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); } - maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); + maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; minnoderecs = mp->m_bmap_dmnr[1]; maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; @@ -4555,7 +4555,7 @@ xfs_bmap_read_extents( */ level = be16_to_cpu(block->bb_level); ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); @@ -6205,13 +6205,13 @@ xfs_check_block( */ if (root) { - pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); } else { pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr); } for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { if (root) { - thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz); + thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); } else { thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j, dmxr); @@ -6266,7 +6266,7 @@ xfs_bmap_check_leaf_extents( level = be16_to_cpu(block->bb_level); ASSERT(level > 0); xfs_check_block(block, mp, 1, ifp->if_broot_bytes); - pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); @@ -6426,7 +6426,7 @@ xfs_bmap_count_blocks( block = ifp->if_broot; level = be16_to_cpu(block->bb_level); ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index c5eeb3241e2..853828c6b45 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -66,6 +66,7 @@ xfs_extent_state( */ void xfs_bmdr_to_bmbt( + struct xfs_mount *mp, xfs_bmdr_block_t *dblock, int dblocklen, xfs_bmbt_block_t *rblock, @@ -83,11 +84,11 @@ xfs_bmdr_to_bmbt( rblock->bb_numrecs = dblock->bb_numrecs; rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); - dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); + dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); - tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); + tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); memcpy(tpp, fpp, sizeof(*fpp) * dmxr); @@ -428,6 +429,7 @@ xfs_bmbt_set_state( */ void xfs_bmbt_to_bmdr( + struct xfs_mount *mp, xfs_bmbt_block_t *rblock, int rblocklen, xfs_bmdr_block_t *dblock, @@ -445,10 +447,10 @@ xfs_bmbt_to_bmdr( ASSERT(be16_to_cpu(rblock->bb_level) > 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; - dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); + dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); - fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); + fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); @@ -626,15 +628,36 @@ xfs_bmbt_get_minrecs( struct xfs_btree_cur *cur, int level) { - return XFS_BMAP_BLOCK_IMINRECS(level, cur); + if (level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, + cur->bc_private.b.whichfork); + + return xfs_bmbt_maxrecs(cur->bc_mp, + ifp->if_broot_bytes, level == 0) / 2; + } + + return cur->bc_mp->m_bmap_dmnr[level != 0]; } -STATIC int +int xfs_bmbt_get_maxrecs( struct xfs_btree_cur *cur, int level) { - return XFS_BMAP_BLOCK_IMAXRECS(level, cur); + if (level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, + cur->bc_private.b.whichfork); + + return xfs_bmbt_maxrecs(cur->bc_mp, + ifp->if_broot_bytes, level == 0); + } + + return cur->bc_mp->m_bmap_dmxr[level != 0]; + } /* @@ -651,7 +674,10 @@ xfs_bmbt_get_dmaxrecs( struct xfs_btree_cur *cur, int level) { - return XFS_BMAP_BLOCK_DMAXRECS(level, cur); + if (level != cur->bc_nlevels - 1) + return cur->bc_mp->m_bmap_dmxr[level != 0]; + return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize, + level == 0); } STATIC void @@ -871,3 +897,35 @@ xfs_bmbt_init_cursor( return cur; } + +/* + * Calculate number of records in a bmap btree block. + */ +int +xfs_bmbt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= sizeof(struct xfs_btree_lblock); + + if (leaf) + return blocklen / sizeof(xfs_bmbt_rec_t); + return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +} + +/* + * Calculate number of records in a bmap btree inode root. + */ +int +xfs_bmdr_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= sizeof(xfs_bmdr_block_t); + + if (leaf) + return blocklen / sizeof(xfs_bmdr_rec_t); + return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t)); +} diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 5669242b52d..835be2a84ca 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -151,33 +151,6 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; #define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) -#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize) -#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \ - ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \ - (cur)->bc_private.b.whichfork)->if_broot_bytes) - -#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \ - (((lev) == (cur)->bc_nlevels - 1 ? \ - XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \ - xfs_bmdr, (lev) == 0) : \ - ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) -#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \ - (((lev) == (cur)->bc_nlevels - 1 ? \ - XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ - xfs_bmbt, (lev) == 0) : \ - ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) - -#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \ - (((lev) == (cur)->bc_nlevels - 1 ? \ - XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur),\ - xfs_bmdr, (lev) == 0) : \ - ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) -#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \ - (((lev) == (cur)->bc_nlevels - 1 ? \ - XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ - xfs_bmbt, (lev) == 0) : \ - ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) - #define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) #define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) @@ -192,8 +165,8 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ be16_to_cpu((bb)->bb_level), cur))) #define XFS_BMAP_PTR_IADDR(bb,i,cur) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \ - be16_to_cpu((bb)->bb_level), cur))) + (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, xfs_bmbt_get_maxrecs(cur, \ + be16_to_cpu((bb)->bb_level)))) /* * These are to be used when we know the size of the block and @@ -203,11 +176,8 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i)) #define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \ (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i)) -#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))) - -#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs) -#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0) +#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb,i,sz) \ + (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,xfs_bmbt_maxrecs(mp, sz, 0))) #define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ (int)(sizeof(xfs_bmbt_block_t) + \ @@ -234,7 +204,8 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; /* * Prototypes for xfs_bmap.c to call. */ -extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); +extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int, + xfs_bmbt_block_t *, int); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); @@ -257,7 +228,12 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); -extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); +extern void xfs_bmbt_to_bmdr(struct xfs_mount *, xfs_bmbt_block_t *, int, + xfs_bmdr_block_t *, int); + +extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); +extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf); +extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7425b2b4a25..795a124cee6 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -148,19 +148,6 @@ do { \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) -/* - * Maximum and minimum records in a btree block. - * Given block size, type prefix, and leaf flag (0 or 1). - * The divisor below is equivalent to lf ? (e1) : (e2) but that produces - * compiler warnings. - */ -#define XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) \ - ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \ - (((lf) * (uint)sizeof(t ## _rec_t)) + \ - ((1 - (lf)) * \ - ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t)))))) -#define XFS_BTREE_BLOCK_MINRECS(bsz,t,lf) \ - (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2) /* * Record, key, and pointer address calculation macros. diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index c9065eaf2a4..2a00fcc36d8 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -78,8 +78,7 @@ typedef struct xfs_dinode xfs_dinode_core_t di_core; /* * In adding anything between the core and the union, be - * sure to update the macros like XFS_LITINO below and - * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h. + * sure to update the macros like XFS_LITINO below. */ __be32 di_next_unlinked;/* agi unlinked list ptr */ union { diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index dcd4a956e73..46aabb3fcbf 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -365,3 +365,19 @@ xfs_inobt_init_cursor( return cur; } + +/* + * Calculate number of records in an inobt btree block. + */ +int +xfs_inobt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= sizeof(struct xfs_btree_sblock); + + if (leaf) + return blocklen / sizeof(xfs_inobt_rec_t); + return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +} diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index ff7406b4bac..f0fc1e46e62 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -84,14 +84,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_INOBT_SET_FREE(rp,i) ((rp)->ir_free |= XFS_INOBT_MASK(i)) #define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) -/* - * Real block structures have a size equal to the disk block size. - */ -#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0]) -#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0]) -#define XFS_INOBT_IS_LAST_REC(cur) \ - ((cur)->bc_ptrs[0] == be16_to_cpu(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs)) - /* * Maximum number of inode btree levels. */ @@ -118,5 +110,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); +extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0c65ba2faa4..73b604e15dc 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -622,7 +622,7 @@ xfs_iformat_btree( ifp = XFS_IFORK_PTR(ip, whichfork); dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); size = XFS_BMAP_BROOT_SPACE(dfp); - nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); + nrecs = be16_to_cpu(dfp->bb_numrecs); /* * blow out if -- fork has less extents than can fit in @@ -650,8 +650,9 @@ xfs_iformat_btree( * Copy and convert from the on-disk structure * to the in-memory structure. */ - xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), - ifp->if_broot, size); + xfs_bmdr_to_bmbt(ip->i_mount, dfp, + XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), + ifp->if_broot, size); ifp->if_flags &= ~XFS_IFEXTENTS; ifp->if_flags |= XFS_IFBROOT; @@ -2348,6 +2349,7 @@ xfs_iroot_realloc( int rec_diff, int whichfork) { + struct xfs_mount *mp = ip->i_mount; int cur_max; xfs_ifork_t *ifp; xfs_bmbt_block_t *new_broot; @@ -2383,7 +2385,7 @@ xfs_iroot_realloc( * location. The records don't change location because * they are kept butted up against the btree block header. */ - cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); new_max = cur_max + rec_diff; new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); ifp->if_broot = (xfs_bmbt_block_t *) @@ -2391,10 +2393,10 @@ xfs_iroot_realloc( new_size, (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ KM_SLEEP); - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, - ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, - (int)new_size); + op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + ifp->if_broot_bytes); + np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + (int)new_size); ifp->if_broot_bytes = (int)new_size; ASSERT(ifp->if_broot_bytes <= XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); @@ -2408,7 +2410,7 @@ xfs_iroot_realloc( * records, just get rid of the root and clear the status bit. */ ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); - cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); new_max = cur_max + rec_diff; ASSERT(new_max >= 0); if (new_max > 0) @@ -2442,9 +2444,9 @@ xfs_iroot_realloc( /* * Then copy the pointers. */ - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, + op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, + np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, (int)new_size); memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); } @@ -2920,7 +2922,7 @@ xfs_iflush_fork( ASSERT(ifp->if_broot_bytes <= (XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ)); - xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, + xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, (xfs_bmdr_block_t *)cp, XFS_DFORK_SIZE(dip, mp, whichfork)); } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 82d46ce69d5..23c3a782a9e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2452,7 +2452,7 @@ xlog_recover_do_inode_trans( break; case XFS_ILOG_DBROOT: - xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, + xfs_bmbt_to_bmdr(mp, (xfs_bmbt_block_t *)src, len, &(dip->di_u.di_bmbt), XFS_DFORK_DSIZE(dip, mp)); break; @@ -2490,7 +2490,7 @@ xlog_recover_do_inode_trans( case XFS_ILOG_ABROOT: dest = XFS_DFORK_APTR(dip); - xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, + xfs_bmbt_to_bmdr(mp, (xfs_bmbt_block_t *)src, len, (xfs_bmdr_block_t*)dest, XFS_DFORK_ASIZE(dip, mp)); break; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5ec6032d230..40338ff8fdd 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -567,8 +567,6 @@ xfs_readsb(xfs_mount_t *mp, int flags) STATIC void xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) { - int i; - mp->m_agfrotor = mp->m_agirotor = 0; spin_lock_init(&mp->m_agirotor_lock); mp->m_maxagi = mp->m_sb.sb_agcount; @@ -605,24 +603,20 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) } ASSERT(mp->m_attroffset < XFS_LITINO(mp)); - for (i = 0; i < 2; i++) { - mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, - xfs_alloc, i == 0); - mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, - xfs_alloc, i == 0); - } - for (i = 0; i < 2; i++) { - mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, - xfs_bmbt, i == 0); - mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, - xfs_bmbt, i == 0); - } - for (i = 0; i < 2; i++) { - mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, - xfs_inobt, i == 0); - mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, - xfs_inobt, i == 0); - } + mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); + mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; + mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; + + mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); + mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; + mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; + + mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); + mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; + mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; mp->m_bsize = XFS_FSB_TO_BB(mp, 1); mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index c6846810936..f4644d71548 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -276,12 +276,12 @@ typedef struct xfs_mount { uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ - uint m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */ - uint m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */ - uint m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */ - uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */ - uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */ - uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */ + uint m_alloc_mxr[2]; /* max alloc btree records */ + uint m_alloc_mnr[2]; /* min alloc btree records */ + uint m_bmap_dmxr[2]; /* max bmap btree records */ + uint m_bmap_dmnr[2]; /* min bmap btree records */ + uint m_inobt_mxr[2]; /* max inobt btree records */ + uint m_inobt_mnr[2]; /* min inobt btree records */ uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ -- cgit v1.2.3 From 6c7699c047c50403149ad91331dd39de47dea070 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:11:29 +1100 Subject: [XFS] remove the mount inode list Now we've removed all users of the mount inode list, we can kill it. This reduces the size of the xfs_inode by 2 pointers. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32293a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_iget.c | 42 +----------------------------------------- fs/xfs/xfs_inode.h | 8 -------- fs/xfs/xfs_mount.c | 5 ----- fs/xfs/xfs_mount.h | 1 - 4 files changed, 1 insertion(+), 55 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 4c92d190b3b..1256746b249 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -76,7 +76,6 @@ xfs_iget_core( { struct inode *old_inode; xfs_inode_t *ip; - xfs_inode_t *iq; int error; unsigned long first_index, mask; xfs_perag_t *pag; @@ -255,24 +254,6 @@ finish_inode: write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); - - /* - * Link ip to its mount and thread it on the mount's inode list. - */ - XFS_MOUNT_ILOCK(mp); - if ((iq = mp->m_inodes)) { - ASSERT(iq->i_mprev->i_mnext == iq); - ip->i_mprev = iq->i_mprev; - iq->i_mprev->i_mnext = ip; - iq->i_mprev = ip; - ip->i_mnext = iq; - } else { - ip->i_mnext = ip; - ip->i_mprev = ip; - } - mp->m_inodes = ip; - - XFS_MOUNT_IUNLOCK(mp); xfs_put_perag(mp, pag); return_ip: @@ -493,36 +474,15 @@ xfs_iextract( { xfs_mount_t *mp = ip->i_mount; xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); - xfs_inode_t *iq; write_lock(&pag->pag_ici_lock); radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); write_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); - /* - * Remove from mount's inode list. - */ - XFS_MOUNT_ILOCK(mp); - ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL)); - iq = ip->i_mnext; - iq->i_mprev = ip->i_mprev; - ip->i_mprev->i_mnext = iq; - - /* - * Fix up the head pointer if it points to the inode being deleted. - */ - if (mp->m_inodes == ip) { - if (ip == iq) { - mp->m_inodes = NULL; - } else { - mp->m_inodes = iq; - } - } - /* Deal with the deleted inodes list */ + XFS_MOUNT_ILOCK(mp); list_del_init(&ip->i_reclaim); - mp->m_ireclaims++; XFS_MOUNT_IUNLOCK(mp); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 104623b7ec6..55d50b888b6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -233,16 +233,8 @@ typedef struct dm_attrs_s { __uint16_t da_pad; /* DMIG extra padding */ } dm_attrs_t; -typedef struct { - struct xfs_inode *ip_mnext; /* next inode in mount list */ - struct xfs_inode *ip_mprev; /* ptr to prev inode */ - struct xfs_mount *ip_mount; /* fs mount struct ptr */ -} xfs_iptr_t; - typedef struct xfs_inode { /* Inode linking and identification information. */ - struct xfs_inode *i_mnext; /* next inode in mount list */ - struct xfs_inode *i_mprev; /* ptr to prev inode */ struct xfs_mount *i_mount; /* fs mount struct ptr */ struct list_head i_reclaim; /* reclaim list */ struct inode *i_vnode; /* vnode backpointer */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 40338ff8fdd..43e5917465a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1282,11 +1282,6 @@ xfs_unmountfs( xfs_unmountfs_wait(mp); /* wait for async bufs */ xfs_log_unmount(mp); /* Done! No more fs ops. */ - /* - * All inodes from this mount point should be freed. - */ - ASSERT(mp->m_inodes == NULL); - if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) uuid_table_remove(&mp->m_sb.sb_uuid); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f4644d71548..0ba05269112 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -248,7 +248,6 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - struct xfs_inode *m_inodes; /* active inode list */ struct list_head m_del_inodes; /* inodes to reclaim */ mutex_t m_ilock; /* inode list mutex */ uint m_ireclaims; /* count of calls to reclaim*/ -- cgit v1.2.3 From 136341b41ad4883bd668120f727a52c42331fe8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:11:40 +1100 Subject: [XFS] cleanup btree record / key / ptr addressing macros. Replace the generic record / key / ptr addressing macros that use cpp token pasting with simpler macros that do the job for just one given btree type. The new macros lose the cur argument and thus can be used outside the core btree code, but also gain an xfs_mount * argument to allow for checking the CRC flag in the near future. Note that many of these macros aren't actually used in the kernel code, but only in userspace (mostly in xfs_repair). SGI-PV: 988146 SGI-Modid: xfs-linux-melb:xfs-kern:32295a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_alloc_btree.c | 2 +- fs/xfs/xfs_alloc_btree.h | 25 +++++++++++++------ fs/xfs/xfs_bmap.c | 54 ++++++++++++++++++----------------------- fs/xfs/xfs_bmap_btree.c | 13 +++++----- fs/xfs/xfs_bmap_btree.h | 62 ++++++++++++++++++++++++++++++----------------- fs/xfs/xfs_btree.h | 15 ------------ fs/xfs/xfs_fsops.c | 4 +-- fs/xfs/xfs_ialloc_btree.h | 29 +++++++++++++++------- fs/xfs/xfs_inode.c | 6 ++--- 9 files changed, 113 insertions(+), 97 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 6ff27b75b93..72c083f62a9 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -179,7 +179,7 @@ xfs_allocbt_update_lastrec( if (numrecs) { xfs_alloc_rec_t *rrp; - rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur); + rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs); len = rrp->ar_blockcount; } else { len = 0; diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index ff1f71d069c..579f9c7e087 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -78,16 +78,27 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; /* * Record, key, and pointer address macros for btree blocks. + * + * (note that some of these may appear unused, but they are used in userspace) */ -#define XFS_ALLOC_REC_ADDR(bb,i,cur) \ - XFS_BTREE_REC_ADDR(xfs_alloc, bb, i) - -#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \ - XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i) +#define XFS_ALLOC_REC_ADDR(mp, block, index) \ + ((xfs_alloc_rec_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + (((index) - 1) * sizeof(xfs_alloc_rec_t)))) -#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ - XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur)) +#define XFS_ALLOC_KEY_ADDR(mp, block, index) \ + ((xfs_alloc_key_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + ((index) - 1) * sizeof(xfs_alloc_key_t))) +#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \ + ((xfs_alloc_ptr_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + (maxrecs) * sizeof(xfs_alloc_key_t) + \ + ((index) - 1) * sizeof(xfs_alloc_ptr_t))) extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 09e4de4ed50..3dab937d4b8 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -393,7 +393,7 @@ xfs_bmap_count_leaves( STATIC void xfs_bmap_disk_count_leaves( - xfs_extnum_t idx, + struct xfs_mount *mp, xfs_bmbt_block_t *block, int numrecs, int *count); @@ -3539,7 +3539,7 @@ xfs_bmap_extents_to_btree( ablock->bb_level = 0; ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO); ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO); - arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); for (cnt = i = 0; i < nextents; i++) { ep = xfs_iext_get_ext(ifp, i); @@ -3554,11 +3554,13 @@ xfs_bmap_extents_to_btree( /* * Fill in the root key and pointer. */ - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); + kp = XFS_BMBT_KEY_ADDR(mp, block, 1); + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, + be16_to_cpu(block->bb_level))); *pp = cpu_to_be64(args.fsbno); + /* * Do all this logging at the end so that * the root is at the right level. @@ -4574,7 +4576,7 @@ xfs_bmap_read_extents( error0); if (level == 0) break; - pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); xfs_trans_brelse(tp, bp); @@ -4617,7 +4619,7 @@ xfs_bmap_read_extents( /* * Copy records into the extent records. */ - frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); + frp = XFS_BMBT_REC_ADDR(mp, block, 1); start = i; for (j = 0; j < num_recs; j++, i++, frp++) { xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); @@ -6187,12 +6189,7 @@ xfs_check_block( prevp = NULL; for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) { dmxr = mp->m_bmap_dmxr[0]; - - if (root) { - keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz); - } else { - keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i); - } + keyp = XFS_BMBT_KEY_ADDR(mp, block, i); if (prevp) { ASSERT(be64_to_cpu(prevp->br_startoff) < @@ -6203,19 +6200,16 @@ xfs_check_block( /* * Compare the block numbers to see if there are dups. */ - - if (root) { + if (root) pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); - } else { - pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr); - } + else + pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); + for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { - if (root) { + if (root) thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); - } else { - thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j, - dmxr); - } + else + thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); if (*thispa == *pp) { cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", __func__, j, i, @@ -6301,7 +6295,7 @@ xfs_bmap_check_leaf_extents( */ xfs_check_block(block, mp, 0, 0); - pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); if (bp_release) { @@ -6337,14 +6331,14 @@ xfs_bmap_check_leaf_extents( * conform with the first entry in this one. */ - ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); + ep = XFS_BMBT_REC_ADDR(mp, block, 1); if (i) { ASSERT(xfs_bmbt_disk_get_startoff(&last) + xfs_bmbt_disk_get_blockcount(&last) <= xfs_bmbt_disk_get_startoff(ep)); } for (j = 1; j < num_recs; j++) { - nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); + nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); ASSERT(xfs_bmbt_disk_get_startoff(ep) + xfs_bmbt_disk_get_blockcount(ep) <= xfs_bmbt_disk_get_startoff(nextp)); @@ -6482,7 +6476,7 @@ xfs_bmap_count_tree( } /* Dive to the next level */ - pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); if (unlikely((error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { @@ -6497,7 +6491,7 @@ xfs_bmap_count_tree( for (;;) { nextbno = be64_to_cpu(block->bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); - xfs_bmap_disk_count_leaves(0, block, numrecs, count); + xfs_bmap_disk_count_leaves(mp, block, numrecs, count); xfs_trans_brelse(tp, bp); if (nextbno == NULLFSBLOCK) break; @@ -6536,7 +6530,7 @@ xfs_bmap_count_leaves( */ STATIC void xfs_bmap_disk_count_leaves( - xfs_extnum_t idx, + struct xfs_mount *mp, xfs_bmbt_block_t *block, int numrecs, int *count) @@ -6545,7 +6539,7 @@ xfs_bmap_disk_count_leaves( xfs_bmbt_rec_t *frp; for (b = 1; b <= numrecs; b++) { - frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); + frp = XFS_BMBT_REC_ADDR(mp, block, b); *count += xfs_bmbt_disk_get_blockcount(frp); } } diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 853828c6b45..11137c042c9 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -44,7 +44,6 @@ #include "xfs_error.h" #include "xfs_quota.h" - /* * Determine the extent state. */ @@ -85,9 +84,9 @@ xfs_bmdr_to_bmbt( rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); - fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); - tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); - fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); + fkp = XFS_BMDR_KEY_ADDR(dblock, 1); + tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); + fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); @@ -448,10 +447,10 @@ xfs_bmbt_to_bmdr( dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); - fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); - tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); + fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); + tkp = XFS_BMDR_KEY_ADDR(dblock, 1); fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); - tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); + tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); memcpy(tpp, fpp, sizeof(*fpp) * dmxr); diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 835be2a84ca..7f001072db4 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -21,6 +21,7 @@ #define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ struct xfs_btree_cur; +struct xfs_btree_block; struct xfs_btree_lblock; struct xfs_mount; struct xfs_inode; @@ -151,33 +152,50 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; #define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) -#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) - -#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) - -#define XFS_BMAP_KEY_DADDR(bb,i,cur) \ - (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) - -#define XFS_BMAP_KEY_IADDR(bb,i,cur) \ - (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) - -#define XFS_BMAP_PTR_DADDR(bb,i,cur) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ - be16_to_cpu((bb)->bb_level), cur))) -#define XFS_BMAP_PTR_IADDR(bb,i,cur) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, xfs_bmbt_get_maxrecs(cur, \ - be16_to_cpu((bb)->bb_level)))) +#define XFS_BMBT_REC_ADDR(mp, block, index) \ + ((xfs_bmbt_rec_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_lblock) + \ + ((index) - 1) * sizeof(xfs_bmbt_rec_t))) + +#define XFS_BMBT_KEY_ADDR(mp, block, index) \ + ((xfs_bmbt_key_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_lblock) + \ + ((index) - 1) * sizeof(xfs_bmbt_key_t))) + +#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \ + ((xfs_bmbt_ptr_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_lblock) + \ + (maxrecs) * sizeof(xfs_bmbt_key_t) + \ + ((index) - 1) * sizeof(xfs_bmbt_ptr_t))) + +#define XFS_BMDR_REC_ADDR(block, index) \ + ((xfs_bmdr_rec_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_bmdr_block) + \ + ((index) - 1) * sizeof(xfs_bmdr_rec_t))) + +#define XFS_BMDR_KEY_ADDR(block, index) \ + ((xfs_bmdr_key_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_bmdr_block) + \ + ((index) - 1) * sizeof(xfs_bmdr_key_t))) + +#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \ + ((xfs_bmdr_ptr_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_bmdr_block) + \ + (maxrecs) * sizeof(xfs_bmdr_key_t) + \ + ((index) - 1) * sizeof(xfs_bmdr_ptr_t))) /* * These are to be used when we know the size of the block and * we don't have a cursor. */ -#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \ - (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i)) -#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \ - (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i)) -#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb,i,sz) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,xfs_bmbt_maxrecs(mp, sz, 0))) +#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \ + XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) #define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ (int)(sizeof(xfs_bmbt_block_t) + \ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 795a124cee6..d6120a74906 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -149,21 +149,6 @@ do { \ } \ } while (0) -/* - * Record, key, and pointer address calculation macros. - * Given block size, type prefix, block pointer, and index of requested entry - * (first entry numbered 1). - */ -#define XFS_BTREE_REC_ADDR(t,bb,i) \ - ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \ - ((i) - 1) * sizeof(t ## _rec_t))) -#define XFS_BTREE_KEY_ADDR(t,bb,i) \ - ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \ - ((i) - 1) * sizeof(t ## _key_t))) -#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \ - ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \ - (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t))) - #define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ struct xfs_btree_ops { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 84583cf73db..8ce72aba027 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -258,7 +258,7 @@ xfs_growfs_data_private( block->bb_numrecs = cpu_to_be16(1); block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); - arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); + arec = XFS_ALLOC_REC_ADDR(mp, block, 1); arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); arec->ar_blockcount = cpu_to_be32( agsize - be32_to_cpu(arec->ar_startblock)); @@ -279,7 +279,7 @@ xfs_growfs_data_private( block->bb_numrecs = cpu_to_be16(1); block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); - arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); + arec = XFS_ALLOC_REC_ADDR(mp, block, 1); arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); arec->ar_blockcount = cpu_to_be32( agsize - be32_to_cpu(arec->ar_startblock)); diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index f0fc1e46e62..fa12c85db34 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -97,16 +97,27 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; /* * Record, key, and pointer address macros for btree blocks. + * + * (note that some of these may appear unused, but they are used in userspace) */ -#define XFS_INOBT_REC_ADDR(bb,i,cur) \ - (XFS_BTREE_REC_ADDR(xfs_inobt, bb, i)) - -#define XFS_INOBT_KEY_ADDR(bb,i,cur) \ - (XFS_BTREE_KEY_ADDR(xfs_inobt, bb, i)) - -#define XFS_INOBT_PTR_ADDR(bb,i,cur) \ - (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \ - i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) +#define XFS_INOBT_REC_ADDR(mp, block, index) \ + ((xfs_inobt_rec_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + (((index) - 1) * sizeof(xfs_inobt_rec_t)))) + +#define XFS_INOBT_KEY_ADDR(mp, block, index) \ + ((xfs_inobt_key_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + ((index) - 1) * sizeof(xfs_inobt_key_t))) + +#define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \ + ((xfs_inobt_ptr_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + (maxrecs) * sizeof(xfs_inobt_key_t) + \ + ((index) - 1) * sizeof(xfs_inobt_ptr_t))) extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 73b604e15dc..7b4f13c710d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2435,10 +2435,8 @@ xfs_iroot_realloc( /* * First copy the records. */ - op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, - ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1, - (int)new_size); + op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); + np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); /* -- cgit v1.2.3 From 7cc95a821df8f09a5d37a923cf8c3a7c3ee00c29 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:14:34 +1100 Subject: [XFS] Always use struct xfs_btree_block instead of short / longform structures. Always use the generic xfs_btree_block type instead of the short / long structures. Add XFS_BTREE_SBLOCK_LEN / XFS_BTREE_LBLOCK_LEN defines for the length of a short / long form block. The rationale for this is that we will grow more btree block header variants to support CRCs and other RAS information, and always accessing them through the same datatype with unions for the short / long form pointers makes implementing this much easier. SGI-PV: 988146 SGI-Modid: xfs-linux-melb:xfs-kern:32300a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_alloc.c | 23 +++++++------- fs/xfs/xfs_alloc_btree.c | 2 +- fs/xfs/xfs_alloc_btree.h | 18 ++++++----- fs/xfs/xfs_bmap.c | 71 +++++++++++++++++++++-------------------- fs/xfs/xfs_bmap_btree.c | 14 ++++---- fs/xfs/xfs_bmap_btree.h | 23 +++++++------- fs/xfs/xfs_btree.c | 81 +++++++++++++++++++++++------------------------ fs/xfs/xfs_btree.h | 53 +++++++++---------------------- fs/xfs/xfs_dinode.h | 2 +- fs/xfs/xfs_fsops.c | 20 ++++++------ fs/xfs/xfs_ialloc_btree.c | 2 +- fs/xfs/xfs_ialloc_btree.h | 19 +++++------ fs/xfs/xfs_inode.c | 13 +++----- fs/xfs/xfs_inode.h | 3 +- fs/xfs/xfs_log_recover.c | 8 ++--- 15 files changed, 165 insertions(+), 187 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 0a2a87208b1..c47ce907572 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -380,21 +380,20 @@ xfs_alloc_fixup_trees( return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } + #ifdef DEBUG - { - xfs_alloc_block_t *bnoblock; - xfs_alloc_block_t *cntblock; - - if (bno_cur->bc_nlevels == 1 && - cnt_cur->bc_nlevels == 1) { - bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); - cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); - XFS_WANT_CORRUPTED_RETURN( - be16_to_cpu(bnoblock->bb_numrecs) == - be16_to_cpu(cntblock->bb_numrecs)); - } + if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { + struct xfs_btree_block *bnoblock; + struct xfs_btree_block *cntblock; + + bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); + + XFS_WANT_CORRUPTED_RETURN( + bnoblock->bb_numrecs == cntblock->bb_numrecs); } #endif + /* * Deal with all four cases: the allocated record is contained * within the freespace record, so we can have new freespace diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 72c083f62a9..733cb75a8c5 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -490,7 +490,7 @@ xfs_allocbt_maxrecs( int blocklen, int leaf) { - blocklen -= sizeof(struct xfs_btree_sblock); + blocklen -= XFS_ALLOC_BLOCK_LEN(mp); if (leaf) return blocklen / sizeof(xfs_alloc_rec_t); diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 579f9c7e087..a6caa0022c9 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -24,7 +24,6 @@ struct xfs_buf; struct xfs_btree_cur; -struct xfs_btree_sblock; struct xfs_mount; /* @@ -50,10 +49,6 @@ typedef struct xfs_alloc_rec_incore { /* btree pointer type */ typedef __be32 xfs_alloc_ptr_t; -/* btree block header type */ -typedef struct xfs_btree_sblock xfs_alloc_block_t; - -#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp)) /* * Minimum and maximum blocksize and sectorsize. @@ -76,6 +71,13 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) #define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) +/* + * Btree block header size depends on a superblock flag. + * + * (not quite yet, but soon) + */ +#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN + /* * Record, key, and pointer address macros for btree blocks. * @@ -84,19 +86,19 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_ALLOC_REC_ADDR(mp, block, index) \ ((xfs_alloc_rec_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_ALLOC_BLOCK_LEN(mp) + \ (((index) - 1) * sizeof(xfs_alloc_rec_t)))) #define XFS_ALLOC_KEY_ADDR(mp, block, index) \ ((xfs_alloc_key_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_ALLOC_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_alloc_key_t))) #define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \ ((xfs_alloc_ptr_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_ALLOC_BLOCK_LEN(mp) + \ (maxrecs) * sizeof(xfs_alloc_key_t) + \ ((index) - 1) * sizeof(xfs_alloc_ptr_t))) diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3dab937d4b8..7796a0c140e 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -394,7 +394,7 @@ xfs_bmap_count_leaves( STATIC void xfs_bmap_disk_count_leaves( struct xfs_mount *mp, - xfs_bmbt_block_t *block, + struct xfs_btree_block *block, int numrecs, int *count); @@ -3042,14 +3042,14 @@ xfs_bmap_btree_to_extents( int whichfork) /* data or attr fork */ { /* REFERENCED */ - xfs_bmbt_block_t *cblock;/* child btree block */ + struct xfs_btree_block *cblock;/* child btree block */ xfs_fsblock_t cbno; /* child block number */ xfs_buf_t *cbp; /* child block's buffer */ int error; /* error return value */ xfs_ifork_t *ifp; /* inode fork data */ xfs_mount_t *mp; /* mount point structure */ __be64 *pp; /* ptr to block address */ - xfs_bmbt_block_t *rblock;/* root btree block */ + struct xfs_btree_block *rblock;/* root btree block */ mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); @@ -3069,8 +3069,8 @@ xfs_bmap_btree_to_extents( if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF))) return error; - cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); - if ((error = xfs_btree_check_lblock(cur, cblock, 0, cbp))) + cblock = XFS_BUF_TO_BLOCK(cbp); + if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); ip->i_d.di_nblocks--; @@ -3450,11 +3450,11 @@ xfs_bmap_extents_to_btree( int *logflagsp, /* inode logging flags */ int whichfork) /* data or attr fork */ { - xfs_bmbt_block_t *ablock; /* allocated (child) bt block */ + struct xfs_btree_block *ablock; /* allocated (child) bt block */ xfs_buf_t *abp; /* buffer for ablock */ xfs_alloc_arg_t args; /* allocation arguments */ xfs_bmbt_rec_t *arp; /* child record pointer */ - xfs_bmbt_block_t *block; /* btree root block */ + struct xfs_btree_block *block; /* btree root block */ xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ int error; /* error return value */ @@ -3474,6 +3474,7 @@ xfs_bmap_extents_to_btree( */ xfs_iroot_realloc(ip, 1, whichfork); ifp->if_flags |= XFS_IFBROOT; + /* * Fill in the root. */ @@ -3481,8 +3482,9 @@ xfs_bmap_extents_to_btree( block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); block->bb_level = cpu_to_be16(1); block->bb_numrecs = cpu_to_be16(1); - block->bb_leftsib = cpu_to_be64(NULLDFSBNO); - block->bb_rightsib = cpu_to_be64(NULLDFSBNO); + block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); + /* * Need a cursor. Can't allocate until bb_level is filled in. */ @@ -3534,11 +3536,11 @@ xfs_bmap_extents_to_btree( /* * Fill in the child block. */ - ablock = XFS_BUF_TO_BMBT_BLOCK(abp); + ablock = XFS_BUF_TO_BLOCK(abp); ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); ablock->bb_level = 0; - ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO); - ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO); + ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); for (cnt = i = 0; i < nextents; i++) { @@ -3550,7 +3552,8 @@ xfs_bmap_extents_to_btree( } } ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); - ablock->bb_numrecs = cpu_to_be16(cnt); + xfs_btree_set_numrecs(ablock, cnt); + /* * Fill in the root key and pointer. */ @@ -4533,7 +4536,7 @@ xfs_bmap_read_extents( xfs_inode_t *ip, /* incore inode */ int whichfork) /* data or attr fork */ { - xfs_bmbt_block_t *block; /* current btree block */ + struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ @@ -4570,7 +4573,7 @@ xfs_bmap_read_extents( if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( XFS_BMAP_SANITY_CHECK(mp, block, level), error0); @@ -4596,7 +4599,7 @@ xfs_bmap_read_extents( xfs_extnum_t start; - num_recs = be16_to_cpu(block->bb_numrecs); + num_recs = xfs_btree_get_numrecs(block); if (unlikely(i + num_recs > room)) { ASSERT(i + num_recs <= room); xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, @@ -4613,7 +4616,7 @@ xfs_bmap_read_extents( /* * Read-ahead the next leaf block, if any. */ - nextbno = be64_to_cpu(block->bb_rightsib); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); if (nextbno != NULLFSBLOCK) xfs_btree_reada_bufl(mp, nextbno, 1); /* @@ -4650,7 +4653,7 @@ xfs_bmap_read_extents( if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); } ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); @@ -6175,7 +6178,7 @@ xfs_bmap_get_bp( void xfs_check_block( - xfs_bmbt_block_t *block, + struct xfs_btree_block *block, xfs_mount_t *mp, int root, short sz) @@ -6187,7 +6190,7 @@ xfs_check_block( ASSERT(be16_to_cpu(block->bb_level) > 0); prevp = NULL; - for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) { + for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { dmxr = mp->m_bmap_dmxr[0]; keyp = XFS_BMBT_KEY_ADDR(mp, block, i); @@ -6232,7 +6235,7 @@ xfs_bmap_check_leaf_extents( xfs_inode_t *ip, /* incore inode pointer */ int whichfork) /* data or attr fork */ { - xfs_bmbt_block_t *block; /* current btree block */ + struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ @@ -6282,7 +6285,7 @@ xfs_bmap_check_leaf_extents( if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, XFS_BMAP_BTREE_REF))) goto error_norelse; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( XFS_BMAP_SANITY_CHECK(mp, block, level), error0); @@ -6317,13 +6320,13 @@ xfs_bmap_check_leaf_extents( xfs_extnum_t num_recs; - num_recs = be16_to_cpu(block->bb_numrecs); + num_recs = xfs_btree_get_numrecs(block); /* * Read-ahead the next leaf block, if any. */ - nextbno = be64_to_cpu(block->bb_rightsib); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); /* * Check all the extents to make sure they are OK. @@ -6367,7 +6370,7 @@ xfs_bmap_check_leaf_extents( if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, XFS_BMAP_BTREE_REF))) goto error_norelse; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); } if (bp_release) { bp_release = 0; @@ -6397,7 +6400,7 @@ xfs_bmap_count_blocks( int whichfork, /* data or attr fork */ int *count) /* out: count of blocks */ { - xfs_bmbt_block_t *block; /* current btree block */ + struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ @@ -6454,24 +6457,24 @@ xfs_bmap_count_tree( __be64 *pp; xfs_fsblock_t bno = blockno; xfs_fsblock_t nextbno; - xfs_bmbt_block_t *block, *nextblock; + struct xfs_btree_block *block, *nextblock; int numrecs; if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; *count += 1; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); if (--level) { /* Not at node above leafs, count this level of nodes */ - nextbno = be64_to_cpu(block->bb_rightsib); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); while (nextbno != NULLFSBLOCK) { if ((error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, XFS_BMAP_BTREE_REF))) return error; *count += 1; - nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp); - nextbno = be64_to_cpu(nextblock->bb_rightsib); + nextblock = XFS_BUF_TO_BLOCK(nbp); + nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); xfs_trans_brelse(tp, nbp); } @@ -6489,7 +6492,7 @@ xfs_bmap_count_tree( } else { /* count all level 1 nodes and their leaves */ for (;;) { - nextbno = be64_to_cpu(block->bb_rightsib); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); xfs_bmap_disk_count_leaves(mp, block, numrecs, count); xfs_trans_brelse(tp, bp); @@ -6500,7 +6503,7 @@ xfs_bmap_count_tree( XFS_BMAP_BTREE_REF))) return error; *count += 1; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); } } return 0; @@ -6531,7 +6534,7 @@ xfs_bmap_count_leaves( STATIC void xfs_bmap_disk_count_leaves( struct xfs_mount *mp, - xfs_bmbt_block_t *block, + struct xfs_btree_block *block, int numrecs, int *count) { diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 11137c042c9..e46e02b8e27 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -68,7 +68,7 @@ xfs_bmdr_to_bmbt( struct xfs_mount *mp, xfs_bmdr_block_t *dblock, int dblocklen, - xfs_bmbt_block_t *rblock, + struct xfs_btree_block *rblock, int rblocklen) { int dmxr; @@ -81,8 +81,8 @@ xfs_bmdr_to_bmbt( rblock->bb_level = dblock->bb_level; ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; - rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); - rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); + rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); fkp = XFS_BMDR_KEY_ADDR(dblock, 1); tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); @@ -429,7 +429,7 @@ xfs_bmbt_set_state( void xfs_bmbt_to_bmdr( struct xfs_mount *mp, - xfs_bmbt_block_t *rblock, + struct xfs_btree_block *rblock, int rblocklen, xfs_bmdr_block_t *dblock, int dblocklen) @@ -441,8 +441,8 @@ xfs_bmbt_to_bmdr( __be64 *tpp; ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); - ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO); - ASSERT(be64_to_cpu(rblock->bb_rightsib) == NULLDFSBNO); + ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO); + ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO); ASSERT(be16_to_cpu(rblock->bb_level) > 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; @@ -906,7 +906,7 @@ xfs_bmbt_maxrecs( int blocklen, int leaf) { - blocklen -= sizeof(struct xfs_btree_lblock); + blocklen -= XFS_BMBT_BLOCK_LEN(mp); if (leaf) return blocklen / sizeof(xfs_bmbt_rec_t); diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 7f001072db4..735a42418c9 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -22,7 +22,6 @@ struct xfs_btree_cur; struct xfs_btree_block; -struct xfs_btree_lblock; struct xfs_mount; struct xfs_inode; struct xfs_trans; @@ -147,27 +146,29 @@ typedef struct xfs_bmbt_key { /* btree pointer type */ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; -/* btree block header type */ -typedef struct xfs_btree_lblock xfs_bmbt_block_t; - -#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) +/* + * Btree block header size depends on a superblock flag. + * + * (not quite yet, but soon) + */ +#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN #define XFS_BMBT_REC_ADDR(mp, block, index) \ ((xfs_bmbt_rec_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_lblock) + \ + XFS_BMBT_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_bmbt_rec_t))) #define XFS_BMBT_KEY_ADDR(mp, block, index) \ ((xfs_bmbt_key_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_lblock) + \ + XFS_BMBT_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_bmbt_key_t))) #define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \ ((xfs_bmbt_ptr_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_lblock) + \ + XFS_BMBT_BLOCK_LEN(mp) + \ (maxrecs) * sizeof(xfs_bmbt_key_t) + \ ((index) - 1) * sizeof(xfs_bmbt_ptr_t))) @@ -198,7 +199,7 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) #define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ - (int)(sizeof(xfs_bmbt_block_t) + \ + (int)(XFS_BTREE_LBLOCK_LEN + \ ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) #define XFS_BMAP_BROOT_SPACE(bb) \ @@ -223,7 +224,7 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; * Prototypes for xfs_bmap.c to call. */ extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int, - xfs_bmbt_block_t *, int); + struct xfs_btree_block *, int); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); @@ -246,7 +247,7 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); -extern void xfs_bmbt_to_bmdr(struct xfs_mount *, xfs_bmbt_block_t *, int, +extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, xfs_bmdr_block_t *, int); extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 72a26bb7643..7ed59267420 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -53,10 +53,10 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { }; -int /* error (0 or EFSCORRUPTED) */ +STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lblock( struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_lblock *block, /* btree long form block pointer */ + struct xfs_btree_block *block, /* btree long form block pointer */ int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer for block, if any */ { @@ -69,12 +69,14 @@ xfs_btree_check_lblock( be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - block->bb_leftsib && - (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || - XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && - block->bb_rightsib && - (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || - XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); + block->bb_u.l.bb_leftsib && + (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, + be64_to_cpu(block->bb_u.l.bb_leftsib))) && + block->bb_u.l.bb_rightsib && + (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, + be64_to_cpu(block->bb_u.l.bb_rightsib))); if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, XFS_RANDOM_BTREE_CHECK_LBLOCK))) { @@ -90,7 +92,7 @@ xfs_btree_check_lblock( STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sblock( struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_sblock *block, /* btree short form block pointer */ + struct xfs_btree_block *block, /* btree short form block pointer */ int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer containing block */ { @@ -107,12 +109,12 @@ xfs_btree_check_sblock( be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK || - be32_to_cpu(block->bb_leftsib) < agflen) && - block->bb_leftsib && - (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK || - be32_to_cpu(block->bb_rightsib) < agflen) && - block->bb_rightsib; + (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || + be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && + block->bb_u.s.bb_leftsib && + (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || + be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && + block->bb_u.s.bb_rightsib; if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK, XFS_RANDOM_BTREE_CHECK_SBLOCK))) { @@ -135,13 +137,10 @@ xfs_btree_check_block( int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer containing block, if any */ { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - return xfs_btree_check_lblock(cur, - (struct xfs_btree_lblock *)block, level, bp); - } else { - return xfs_btree_check_sblock(cur, - (struct xfs_btree_sblock *)block, level, bp); - } + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return xfs_btree_check_lblock(cur, block, level, bp); + else + return xfs_btree_check_sblock(cur, block, level, bp); } /* @@ -326,8 +325,8 @@ xfs_btree_dup_cursor( static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) { return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? - sizeof(struct xfs_btree_lblock) : - sizeof(struct xfs_btree_sblock); + XFS_BTREE_LBLOCK_LEN : + XFS_BTREE_SBLOCK_LEN; } /* @@ -510,7 +509,7 @@ xfs_btree_islastblock( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to check */ { - xfs_btree_block_t *block; /* generic btree block pointer */ + struct xfs_btree_block *block; /* generic btree block pointer */ xfs_buf_t *bp; /* buffer containing block */ block = xfs_btree_get_block(cur, level, &bp); @@ -530,7 +529,7 @@ xfs_btree_firstrec( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to change */ { - xfs_btree_block_t *block; /* generic btree block pointer */ + struct xfs_btree_block *block; /* generic btree block pointer */ xfs_buf_t *bp; /* buffer containing block */ /* @@ -559,7 +558,7 @@ xfs_btree_lastrec( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to change */ { - xfs_btree_block_t *block; /* generic btree block pointer */ + struct xfs_btree_block *block; /* generic btree block pointer */ xfs_buf_t *bp; /* buffer containing block */ /* @@ -814,7 +813,7 @@ xfs_btree_setbuf( int lev, /* level in btree */ xfs_buf_t *bp) /* new buffer to set */ { - xfs_btree_block_t *b; /* btree block */ + struct xfs_btree_block *b; /* btree block */ xfs_buf_t *obp; /* old buffer pointer */ obp = cur->bc_bufs[lev]; @@ -1252,20 +1251,20 @@ xfs_btree_log_block( int first; /* first byte offset logged */ int last; /* last byte offset logged */ static const short soffsets[] = { /* table of offsets (short) */ - offsetof(struct xfs_btree_sblock, bb_magic), - offsetof(struct xfs_btree_sblock, bb_level), - offsetof(struct xfs_btree_sblock, bb_numrecs), - offsetof(struct xfs_btree_sblock, bb_leftsib), - offsetof(struct xfs_btree_sblock, bb_rightsib), - sizeof(struct xfs_btree_sblock) + offsetof(struct xfs_btree_block, bb_magic), + offsetof(struct xfs_btree_block, bb_level), + offsetof(struct xfs_btree_block, bb_numrecs), + offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), + offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), + XFS_BTREE_SBLOCK_LEN }; static const short loffsets[] = { /* table of offsets (long) */ - offsetof(struct xfs_btree_lblock, bb_magic), - offsetof(struct xfs_btree_lblock, bb_level), - offsetof(struct xfs_btree_lblock, bb_numrecs), - offsetof(struct xfs_btree_lblock, bb_leftsib), - offsetof(struct xfs_btree_lblock, bb_rightsib), - sizeof(struct xfs_btree_lblock) + offsetof(struct xfs_btree_block, bb_magic), + offsetof(struct xfs_btree_block, bb_level), + offsetof(struct xfs_btree_block, bb_numrecs), + offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), + offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), + XFS_BTREE_LBLOCK_LEN }; XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); @@ -3018,7 +3017,7 @@ xfs_btree_kill_iroot( if (index) { xfs_iroot_realloc(cur->bc_private.b.ip, index, cur->bc_private.b.whichfork); - block = (struct xfs_btree_block *)ifp->if_broot; + block = ifp->if_broot; } be16_add_cpu(&block->bb_numrecs, index); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index d6120a74906..789fffdf8b2 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -39,31 +39,16 @@ extern kmem_zone_t *xfs_btree_cur_zone; #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) /* - * Short form header: space allocation btrees. - */ -typedef struct xfs_btree_sblock { - __be32 bb_magic; /* magic number for block type */ - __be16 bb_level; /* 0 is a leaf */ - __be16 bb_numrecs; /* current # of data records */ - __be32 bb_leftsib; /* left sibling block or NULLAGBLOCK */ - __be32 bb_rightsib; /* right sibling block or NULLAGBLOCK */ -} xfs_btree_sblock_t; - -/* - * Long form header: bmap btrees. - */ -typedef struct xfs_btree_lblock { - __be32 bb_magic; /* magic number for block type */ - __be16 bb_level; /* 0 is a leaf */ - __be16 bb_numrecs; /* current # of data records */ - __be64 bb_leftsib; /* left sibling block or NULLDFSBNO */ - __be64 bb_rightsib; /* right sibling block or NULLDFSBNO */ -} xfs_btree_lblock_t; - -/* - * Combined header and structure, used by common code. + * Generic btree header. + * + * This is a comination of the actual format used on disk for short and long + * format btrees. The first three fields are shared by both format, but + * the pointers are different and should be used with care. + * + * To get the size of the actual short or long form headers please use + * the size macros below. Never use sizeof(xfs_btree_block). */ -typedef struct xfs_btree_block { +struct xfs_btree_block { __be32 bb_magic; /* magic number for block type */ __be16 bb_level; /* 0 is a leaf */ __be16 bb_numrecs; /* current # of data records */ @@ -77,7 +62,11 @@ typedef struct xfs_btree_block { __be64 bb_rightsib; } l; /* long form pointers */ } bb_u; /* rest */ -} xfs_btree_block_t; +}; + +#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ +#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ + /* * Generic key, ptr and record wrapper structures. @@ -294,20 +283,8 @@ typedef struct xfs_btree_cur /* * Convert from buffer to btree block header. */ -#define XFS_BUF_TO_BLOCK(bp) ((xfs_btree_block_t *)XFS_BUF_PTR(bp)) -#define XFS_BUF_TO_LBLOCK(bp) ((xfs_btree_lblock_t *)XFS_BUF_PTR(bp)) -#define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp)) - +#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp)) -/* - * Check that long form block header is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lblock( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_lblock *block, /* btree long form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp); /* buffer containing block, if any */ /* * Check that block header is ok. diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 2a00fcc36d8..d7cf392cc85 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -165,7 +165,7 @@ typedef enum xfs_dinode_fmt */ #define XFS_LITINO(mp) ((mp)->m_litino) #define XFS_BROOT_SIZE_ADJ \ - (sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t)) + (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t)) /* * Inode data & attribute fork sizes, per inode. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8ce72aba027..f1d0585041b 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -126,7 +126,7 @@ xfs_growfs_data_private( xfs_extlen_t agsize; xfs_extlen_t tmpsize; xfs_alloc_rec_t *arec; - xfs_btree_sblock_t *block; + struct xfs_btree_block *block; xfs_buf_t *bp; int bucket; int dpct; @@ -251,13 +251,13 @@ xfs_growfs_data_private( bp = xfs_buf_get(mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), BTOBB(mp->m_sb.sb_blocksize), 0); - block = XFS_BUF_TO_SBLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); block->bb_level = 0; block->bb_numrecs = cpu_to_be16(1); - block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); arec = XFS_ALLOC_REC_ADDR(mp, block, 1); arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); arec->ar_blockcount = cpu_to_be32( @@ -272,13 +272,13 @@ xfs_growfs_data_private( bp = xfs_buf_get(mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), BTOBB(mp->m_sb.sb_blocksize), 0); - block = XFS_BUF_TO_SBLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); block->bb_level = 0; block->bb_numrecs = cpu_to_be16(1); - block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); arec = XFS_ALLOC_REC_ADDR(mp, block, 1); arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); arec->ar_blockcount = cpu_to_be32( @@ -294,13 +294,13 @@ xfs_growfs_data_private( bp = xfs_buf_get(mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), BTOBB(mp->m_sb.sb_blocksize), 0); - block = XFS_BUF_TO_SBLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); block->bb_level = 0; block->bb_numrecs = 0; - block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); error = xfs_bwrite(mp, bp); if (error) { goto error0; diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 46aabb3fcbf..99f2408e8d8 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -375,7 +375,7 @@ xfs_inobt_maxrecs( int blocklen, int leaf) { - blocklen -= sizeof(struct xfs_btree_sblock); + blocklen -= XFS_INOBT_BLOCK_LEN(mp); if (leaf) return blocklen / sizeof(xfs_inobt_rec_t); diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index fa12c85db34..37e5dd01a57 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -24,7 +24,6 @@ struct xfs_buf; struct xfs_btree_cur; -struct xfs_btree_sblock; struct xfs_mount; /* @@ -70,11 +69,6 @@ typedef struct xfs_inobt_key { /* btree pointer type */ typedef __be32 xfs_inobt_ptr_t; -/* btree block header type */ -typedef struct xfs_btree_sblock xfs_inobt_block_t; - -#define XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)XFS_BUF_PTR(bp)) - /* * Bit manipulations for ir_free. */ @@ -95,6 +89,13 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) #define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) +/* + * Btree block header size depends on a superblock flag. + * + * (not quite yet, but soon) + */ +#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN + /* * Record, key, and pointer address macros for btree blocks. * @@ -103,19 +104,19 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_INOBT_REC_ADDR(mp, block, index) \ ((xfs_inobt_rec_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_INOBT_BLOCK_LEN(mp) + \ (((index) - 1) * sizeof(xfs_inobt_rec_t)))) #define XFS_INOBT_KEY_ADDR(mp, block, index) \ ((xfs_inobt_key_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_INOBT_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_inobt_key_t))) #define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \ ((xfs_inobt_ptr_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_INOBT_BLOCK_LEN(mp) + \ (maxrecs) * sizeof(xfs_inobt_key_t) + \ ((index) - 1) * sizeof(xfs_inobt_ptr_t))) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7b4f13c710d..bc33762abc4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2352,7 +2352,7 @@ xfs_iroot_realloc( struct xfs_mount *mp = ip->i_mount; int cur_max; xfs_ifork_t *ifp; - xfs_bmbt_block_t *new_broot; + struct xfs_btree_block *new_broot; int new_max; size_t new_size; char *np; @@ -2373,8 +2373,7 @@ xfs_iroot_realloc( */ if (ifp->if_broot_bytes == 0) { new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); - ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, - KM_SLEEP); + ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); ifp->if_broot_bytes = (int)new_size; return; } @@ -2388,9 +2387,7 @@ xfs_iroot_realloc( cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); new_max = cur_max + rec_diff; new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); - ifp->if_broot = (xfs_bmbt_block_t *) - kmem_realloc(ifp->if_broot, - new_size, + ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ KM_SLEEP); op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, @@ -2418,11 +2415,11 @@ xfs_iroot_realloc( else new_size = 0; if (new_size > 0) { - new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); + new_broot = kmem_alloc(new_size, KM_SLEEP); /* * First copy over the btree block header. */ - memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t)); + memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); } else { new_broot = NULL; ifp->if_flags &= ~XFS_IFBROOT; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 55d50b888b6..6fd20fc179a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -63,7 +63,7 @@ typedef struct xfs_ext_irec { typedef struct xfs_ifork { int if_bytes; /* bytes in if_u1 */ int if_real_bytes; /* bytes allocated in if_u1 */ - xfs_bmbt_block_t *if_broot; /* file's incore btree root */ + struct xfs_btree_block *if_broot; /* file's incore btree root */ short if_broot_bytes; /* bytes allocated for root */ unsigned char if_flags; /* per-fork flags */ unsigned char if_ext_max; /* max # of extent records */ @@ -213,7 +213,6 @@ struct ktrace; struct xfs_buf; struct xfs_bmap_free; struct xfs_bmbt_irec; -struct xfs_bmbt_block; struct xfs_inode_log_item; struct xfs_mount; struct xfs_trans; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 23c3a782a9e..199c8ea3647 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2452,8 +2452,8 @@ xlog_recover_do_inode_trans( break; case XFS_ILOG_DBROOT: - xfs_bmbt_to_bmdr(mp, (xfs_bmbt_block_t *)src, len, - &(dip->di_u.di_bmbt), + xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, + &dip->di_u.di_bmbt, XFS_DFORK_DSIZE(dip, mp)); break; @@ -2490,8 +2490,8 @@ xlog_recover_do_inode_trans( case XFS_ILOG_ABROOT: dest = XFS_DFORK_APTR(dip); - xfs_bmbt_to_bmdr(mp, (xfs_bmbt_block_t *)src, len, - (xfs_bmdr_block_t*)dest, + xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, + len, (xfs_bmdr_block_t*)dest, XFS_DFORK_ASIZE(dip, mp)); break; -- cgit v1.2.3 From 4e8938feba770b583fb13d249c17943961731a3e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:14:43 +1100 Subject: [XFS] Move XFS_BMAP_SANITY_CHECK out of line. Move the XFS_BMAP_SANITY_CHECK macro out of line and make it a properly typed function. Also pass the xfs_buf for the btree block instead of just the btree block header, as we will need some additional information for it to implement CRC checking of btree blocks. SGI-PV: 988146 SGI-Modid: xfs-linux-melb:xfs-kern:32301a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_bmap.c | 22 +++++++++++++++++++--- fs/xfs/xfs_bmap_btree.h | 7 ------- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 7796a0c140e..db289050692 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4524,6 +4524,22 @@ xfs_bmap_one_block( return rval; } +STATIC int +xfs_bmap_sanity_check( + struct xfs_mount *mp, + struct xfs_buf *bp, + int level) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC || + be16_to_cpu(block->bb_level) != level || + be16_to_cpu(block->bb_numrecs) == 0 || + be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) + return 0; + return 1; +} + /* * Read in the extents to if_extents. * All inode fields are set up by caller, we just traverse the btree @@ -4575,7 +4591,7 @@ xfs_bmap_read_extents( return error; block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( - XFS_BMAP_SANITY_CHECK(mp, block, level), + xfs_bmap_sanity_check(mp, bp, level), error0); if (level == 0) break; @@ -4611,7 +4627,7 @@ xfs_bmap_read_extents( goto error0; } XFS_WANT_CORRUPTED_GOTO( - XFS_BMAP_SANITY_CHECK(mp, block, 0), + xfs_bmap_sanity_check(mp, bp, 0), error0); /* * Read-ahead the next leaf block, if any. @@ -6287,7 +6303,7 @@ xfs_bmap_check_leaf_extents( goto error_norelse; block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( - XFS_BMAP_SANITY_CHECK(mp, block, level), + xfs_bmap_sanity_check(mp, bp, level), error0); if (level == 0) break; diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 735a42418c9..a4555abb662 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -213,13 +213,6 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; */ #define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) -#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \ - (be32_to_cpu((bb)->bb_magic) == XFS_BMAP_MAGIC && \ - be16_to_cpu((bb)->bb_level) == level && \ - be16_to_cpu((bb)->bb_numrecs) > 0 && \ - be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0]) - - /* * Prototypes for xfs_bmap.c to call. */ -- cgit v1.2.3 From 2af75df7be7ca86965bf73766f827575d1c26fbd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:14:53 +1100 Subject: [XFS] split out two helpers from xfs_syncsub Split out two helpers from xfs_syncsub for the dummy log commit and the superblock writeout. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32303a Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_sync.c | 162 +++++++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 69 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 53d85ecb1d5..59da3327a6b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -315,6 +315,93 @@ xfs_sync_inodes( return XFS_ERROR(last_error); } +STATIC int +xfs_commit_dummy_trans( + struct xfs_mount *mp, + uint log_flags) +{ + struct xfs_inode *ip = mp->m_rootip; + struct xfs_trans *tp; + int error; + + /* + * Put a dummy transaction in the log to tell recovery + * that all others are OK. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + /* XXX(hch): ignoring the error here.. */ + error = xfs_trans_commit(tp, 0); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + xfs_log_force(mp, 0, log_flags); + return 0; +} + +STATIC int +xfs_sync_fsdata( + struct xfs_mount *mp, + int flags) +{ + struct xfs_buf *bp; + struct xfs_buf_log_item *bip; + int error = 0; + + /* + * If this is xfssyncd() then only sync the superblock if we can + * lock it without sleeping and it is not pinned. + */ + if (flags & SYNC_BDFLUSH) { + ASSERT(!(flags & SYNC_WAIT)); + + bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + if (!bp) + goto out; + + bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *); + if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp)) + goto out_brelse; + } else { + bp = xfs_getsb(mp, 0); + + /* + * If the buffer is pinned then push on the log so we won't + * get stuck waiting in the write for someone, maybe + * ourselves, to flush the log. + * + * Even though we just pushed the log above, we did not have + * the superblock buffer locked at that point so it can + * become pinned in between there and here. + */ + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(mp, 0, XFS_LOG_FORCE); + } + + + if (flags & SYNC_WAIT) + XFS_BUF_UNASYNC(bp); + else + XFS_BUF_ASYNC(bp); + + return xfs_bwrite(mp, bp); + + out_brelse: + xfs_buf_relse(bp); + out: + return error; +} + /* * xfs sync routine for internal use * @@ -331,8 +418,6 @@ xfs_syncsub( int error = 0; int last_error = 0; uint log_flags = XFS_LOG_FORCE; - xfs_buf_t *bp; - xfs_buf_log_item_t *bip; /* * Sync out the log. This ensures that the log is periodically @@ -355,83 +440,22 @@ xfs_syncsub( * log activity, so if this isn't vfs_sync() then flush * the log again. */ - if (flags & SYNC_DELWRI) { - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - } + if (flags & SYNC_DELWRI) + xfs_log_force(mp, 0, log_flags); if (flags & SYNC_FSDATA) { - /* - * If this is vfs_sync() then only sync the superblock - * if we can lock it without sleeping and it is not pinned. - */ - if (flags & SYNC_BDFLUSH) { - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); - if (bp != NULL) { - bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); - if ((bip != NULL) && - xfs_buf_item_dirty(bip)) { - if (!(XFS_BUF_ISPINNED(bp))) { - XFS_BUF_ASYNC(bp); - error = xfs_bwrite(mp, bp); - } else { - xfs_buf_relse(bp); - } - } else { - xfs_buf_relse(bp); - } - } - } else { - bp = xfs_getsb(mp, 0); - /* - * If the buffer is pinned then push on the log so - * we won't get stuck waiting in the write for - * someone, maybe ourselves, to flush the log. - * Even though we just pushed the log above, we - * did not have the superblock buffer locked at - * that point so it can become pinned in between - * there and here. - */ - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - if (flags & SYNC_WAIT) - XFS_BUF_UNASYNC(bp); - else - XFS_BUF_ASYNC(bp); - error = xfs_bwrite(mp, bp); - } - if (error) { + error = xfs_sync_fsdata(mp, flags); + if (error) last_error = error; - } } /* * Now check to see if the log needs a "dummy" transaction. */ if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { - xfs_trans_t *tp; - xfs_inode_t *ip; - - /* - * Put a dummy transaction in the log to tell - * recovery that all others are OK. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - if ((error = xfs_trans_reserve(tp, 0, - XFS_ICHANGE_LOG_RES(mp), - 0, 0, 0))) { - xfs_trans_cancel(tp, 0); + error = xfs_commit_dummy_trans(mp, log_flags); + if (error) return error; - } - - ip = mp->m_rootip; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); } /* -- cgit v1.2.3 From bc60a99323b3ec628273b5fa998285c87d464ca6 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:03 +1100 Subject: [XFS] Use struct inodes instead of vnodes to kill vn_grab With the sync code relocated to the linux-2.6 directory we can use struct inodes directly. If we do the same thing for the quota release code, we can remove vn_grab altogether. While here, convert the VN_BAD() checks to is_bad_inode() so we can remove vnodes entirely from this code. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32304a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 53 +++++++++++++++++++++--------------------- fs/xfs/linux-2.6/xfs_vnode.c | 6 ++--- fs/xfs/linux-2.6/xfs_vnode.h | 5 ---- fs/xfs/quota/xfs_qm_syscalls.c | 16 ++++++------- 4 files changed, 37 insertions(+), 43 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 59da3327a6b..461c1dc35d3 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -131,10 +131,7 @@ xfs_sync_inodes_ag( int flags, int *bypassed) { - xfs_inode_t *ip = NULL; - struct inode *vp = NULL; xfs_perag_t *pag = &mp->m_perag[ag]; - boolean_t vnode_refed = B_FALSE; int nr_found; int first_index = 0; int error = 0; @@ -156,6 +153,10 @@ xfs_sync_inodes_ag( } do { + struct inode *inode; + boolean_t inode_refed; + xfs_inode_t *ip = NULL; + /* * use a gang lookup to find the next inode in the tree * as the tree is sparse and a gang lookup walks to find @@ -177,14 +178,14 @@ xfs_sync_inodes_ag( * skip inodes in reclaim. Let xfs_syncsub do that for * us so we don't need to worry. */ - vp = VFS_I(ip); - if (!vp) { + if (xfs_iflags_test(ip, (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { read_unlock(&pag->pag_ici_lock); continue; } /* bad inodes are dealt with elsewhere */ - if (VN_BAD(vp)) { + inode = VFS_I(ip); + if (is_bad_inode(inode)) { read_unlock(&pag->pag_ici_lock); continue; } @@ -196,30 +197,29 @@ xfs_sync_inodes_ag( } /* - * The inode lock here actually coordinates with the almost - * spurious inode lock in xfs_ireclaim() to prevent the vnode - * we handle here without a reference from being freed while we - * reference it. If we lock the inode while it's on the mount - * list here, then the spurious inode lock in xfs_ireclaim() - * after the inode is pulled from the mount list will sleep - * until we release it here. This keeps the vnode from being - * freed while we reference it. + * If we can't get a reference on the VFS_I, the inode must be + * in reclaim. If we can get the inode lock without blocking, + * it is safe to flush the inode because we hold the tree lock + * and xfs_iextract will block right now. Hence if we lock the + * inode while holding the tree lock, xfs_ireclaim() is + * guaranteed to block on the inode lock we now hold and hence + * it is safe to reference the inode until we drop the inode + * locks completely. */ - if (xfs_ilock_nowait(ip, lock_flags) == 0) { - vp = vn_grab(vp); + inode_refed = B_FALSE; + if (igrab(inode)) { read_unlock(&pag->pag_ici_lock); - if (!vp) - continue; xfs_ilock(ip, lock_flags); - - ASSERT(vp == VFS_I(ip)); - ASSERT(ip->i_mount == mp); - - vnode_refed = B_TRUE; + inode_refed = B_TRUE; } else { - /* safe to unlock here as we have a reference */ + if (!xfs_ilock_nowait(ip, lock_flags)) { + /* leave it to reclaim */ + read_unlock(&pag->pag_ici_lock); + continue; + } read_unlock(&pag->pag_ici_lock); } + /* * If we have to flush data or wait for I/O completion * we need to drop the ilock that we currently hold. @@ -240,7 +240,7 @@ xfs_sync_inodes_ag( xfs_ilock(ip, XFS_ILOCK_SHARED); } - if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { + if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); if (flags & SYNC_IOWAIT) @@ -268,9 +268,8 @@ xfs_sync_inodes_ag( if (lock_flags) xfs_iunlock(ip, lock_flags); - if (vnode_refed) { + if (inode_refed) { IRELE(ip); - vnode_refed = B_FALSE; } if (error) diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index b52528bbbff..dceb6dbaa2d 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -90,10 +90,10 @@ vn_ioerror( */ static inline int xfs_icount(struct xfs_inode *ip) { - struct inode *vp = VFS_I(ip); + struct inode *inode = VFS_I(ip); - if (vp) - return vn_count(vp); + if (!inode) + return atomic_read(&inode->i_count); return -1; } diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 683ce16210f..bf89e41c3b8 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -80,11 +80,6 @@ do { \ iput(VFS_I(ip)); \ } while (0) -static inline struct inode *vn_grab(struct inode *vp) -{ - return igrab(vp); -} - /* * Dealing with bad inodes */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 26152b9ccc6..4254b074223 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1031,13 +1031,13 @@ xfs_qm_dqrele_inodes_ag( uint flags) { xfs_inode_t *ip = NULL; - struct inode *vp = NULL; xfs_perag_t *pag = &mp->m_perag[ag]; int first_index = 0; int nr_found; do { - boolean_t vnode_refd = B_FALSE; + boolean_t inode_refed; + struct inode *inode; /* * use a gang lookup to find the next inode in the tree @@ -1057,19 +1057,19 @@ xfs_qm_dqrele_inodes_ag( first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); /* skip quota inodes and those in reclaim */ - vp = VFS_I(ip); - if (!vp || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { + inode = VFS_I(ip); + if (!inode || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); read_unlock(&pag->pag_ici_lock); continue; } if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - vp = vn_grab(vp); + inode = igrab(inode); read_unlock(&pag->pag_ici_lock); - if (!vp) + if (!inode) continue; - vnode_refd = B_TRUE; + inode_refed = B_TRUE; xfs_ilock(ip, XFS_ILOCK_EXCL); } else { read_unlock(&pag->pag_ici_lock); @@ -1084,7 +1084,7 @@ xfs_qm_dqrele_inodes_ag( ip->i_gdquot = NULL; } xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (vnode_refd) + if (inode_refed) IRELE(ip); } while (nr_found); } -- cgit v1.2.3 From 2030b5aba8a4bcaca5aca85968514fa58207d3bd Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:12 +1100 Subject: [XFS] use xfs_sync_inodes rather than xfs_syncsub Kill the unused arg in xfs_syncsub() and xfs_sync_inodes(). For callers of xfs_syncsub() that only want to flush inodes, replace xfs_syncsub() with direct calls to xfs_sync_inodes() as that is all that is being done with the specific flags being passed in. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32305a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 21 +++++++-------------- fs/xfs/linux-2.6/xfs_sync.h | 2 +- fs/xfs/quota/xfs_qm_syscalls.c | 2 +- fs/xfs/xfs_mount.h | 2 -- fs/xfs/xfs_vfsops.c | 14 +++++++------- 5 files changed, 16 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 461c1dc35d3..7e9fb5251b2 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -117,7 +117,7 @@ xfs_sync( if (flags & SYNC_IOWAIT) xfs_filestream_flush(mp); - return xfs_syncsub(mp, flags, NULL); + return xfs_syncsub(mp, flags); } /* @@ -128,8 +128,7 @@ STATIC int xfs_sync_inodes_ag( xfs_mount_t *mp, int ag, - int flags, - int *bypassed) + int flags) { xfs_perag_t *pag = &mp->m_perag[ag]; int nr_found; @@ -260,8 +259,6 @@ xfs_sync_inodes_ag( error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); else xfs_ifunlock(ip); - } else if (bypassed) { - (*bypassed)++; } } @@ -288,15 +285,12 @@ xfs_sync_inodes_ag( int xfs_sync_inodes( xfs_mount_t *mp, - int flags, - int *bypassed) + int flags) { int error; int last_error; int i; - if (bypassed) - *bypassed = 0; if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; error = 0; @@ -305,7 +299,7 @@ xfs_sync_inodes( for (i = 0; i < mp->m_sb.sb_agcount; i++) { if (!mp->m_perag[i].pag_ici_init) continue; - error = xfs_sync_inodes_ag(mp, i, flags, bypassed); + error = xfs_sync_inodes_ag(mp, i, flags); if (error) last_error = error; if (error == EFSCORRUPTED) @@ -408,11 +402,10 @@ xfs_sync_fsdata( * interface as explained above under xfs_sync. * */ -int +STATIC int xfs_syncsub( xfs_mount_t *mp, - int flags, - int *bypassed) + int flags) { int error = 0; int last_error = 0; @@ -431,7 +424,7 @@ xfs_syncsub( if (flags & SYNC_BDFLUSH) xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); else - error = xfs_sync_inodes(mp, flags, bypassed); + error = xfs_sync_inodes(mp, flags); } /* diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 3746d153ec8..29548619940 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -55,7 +55,7 @@ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); int xfs_sync(struct xfs_mount *mp, int flags); -int xfs_syncsub(struct xfs_mount *mp, int flags, int *bypassed); +int xfs_sync_inodes(struct xfs_mount *mp, int flags); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 4254b074223..9ff28e6c5b8 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -127,7 +127,7 @@ xfs_qm_quotactl( break; case Q_XQUOTASYNC: - return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL)); + return xfs_sync_inodes(mp, SYNC_DELWRI); default: break; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0ba05269112..4e62802b6ab 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -523,8 +523,6 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern int xfs_fs_writable(xfs_mount_t *); -extern int xfs_syncsub(xfs_mount_t *, int, int *); -extern int xfs_sync_inodes(xfs_mount_t *, int, int *); extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 0c5ee5ec7ee..d5396d6f517 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -68,15 +68,15 @@ xfs_quiesce_fs( xfs_flush_buftarg(mp->m_ddev_targp, 0); xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); - /* This loop must run at least twice. - * The first instance of the loop will flush - * most meta data but that will generate more - * meta data (typically directory updates). - * Which then must be flushed and logged before - * we can write the unmount record. + /* + * This loop must run at least twice. The first instance of the loop + * will flush most meta data but that will generate more meta data + * (typically directory updates). Which then must be flushed and + * logged before we can write the unmount record. */ do { - xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL); + xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_sync_inodes(mp, SYNC_INODE_QUIESCE); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { delay(50); -- cgit v1.2.3 From dfd837a9eb79de4e50323a6f4e1ad8138d806cb7 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:21 +1100 Subject: [XFS] kill xfs_syncsub Now that the only caller is xfs_sync(), merge the two together as it makes no sense to keep them separate. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32306a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 141 +++++++++++++++++++------------------------- 1 file changed, 62 insertions(+), 79 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 7e9fb5251b2..d4b7b21a6e5 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -47,79 +47,6 @@ #include #include -/* - * xfs_sync flushes any pending I/O to file system vfsp. - * - * This routine is called by vfs_sync() to make sure that things make it - * out to disk eventually, on sync() system calls to flush out everything, - * and when the file system is unmounted. For the vfs_sync() case, all - * we really need to do is sync out the log to make all of our meta-data - * updates permanent (except for timestamps). For calls from pflushd(), - * dirty pages are kept moving by calling pdflush() on the inodes - * containing them. We also flush the inodes that we can lock without - * sleeping and the superblock if we can lock it without sleeping from - * vfs_sync() so that items at the tail of the log are always moving out. - * - * Flags: - * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want - * to sleep if we can help it. All we really need - * to do is ensure that the log is synced at least - * periodically. We also push the inodes and - * superblock if we can lock them without sleeping - * and they are not pinned. - * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not - * set, then we really want to lock each inode and flush - * it. - * SYNC_WAIT - All the flushes that take place in this call should - * be synchronous. - * SYNC_DELWRI - This tells us to push dirty pages associated with - * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to - * determine if they should be flushed sync, async, or - * delwri. - * SYNC_CLOSE - This flag is passed when the system is being - * unmounted. We should sync and invalidate everything. - * SYNC_FSDATA - This indicates that the caller would like to make - * sure the superblock is safe on disk. We can ensure - * this by simply making sure the log gets flushed - * if SYNC_BDFLUSH is set, and by actually writing it - * out otherwise. - * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete - * before we return (including direct I/O). Forms the drain - * side of the write barrier needed to safely quiesce the - * filesystem. - * - */ -int -xfs_sync( - xfs_mount_t *mp, - int flags) -{ - int error; - - /* - * Get the Quota Manager to flush the dquots. - * - * If XFS quota support is not enabled or this filesystem - * instance does not use quotas XFS_QM_DQSYNC will always - * return zero. - */ - error = XFS_QM_DQSYNC(mp, flags); - if (error) { - /* - * If we got an IO error, we will be shutting down. - * So, there's nothing more for us to do here. - */ - ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(error); - } - - if (flags & SYNC_IOWAIT) - xfs_filestream_flush(mp); - - return xfs_syncsub(mp, flags); -} - /* * Sync all the inodes in the given AG according to the * direction given by the flags. @@ -396,21 +323,77 @@ xfs_sync_fsdata( } /* - * xfs sync routine for internal use + * xfs_sync flushes any pending I/O to file system vfsp. * - * This routine supports all of the flags defined for the generic vfs_sync - * interface as explained above under xfs_sync. + * This routine is called by vfs_sync() to make sure that things make it + * out to disk eventually, on sync() system calls to flush out everything, + * and when the file system is unmounted. For the vfs_sync() case, all + * we really need to do is sync out the log to make all of our meta-data + * updates permanent (except for timestamps). For calls from pflushd(), + * dirty pages are kept moving by calling pdflush() on the inodes + * containing them. We also flush the inodes that we can lock without + * sleeping and the superblock if we can lock it without sleeping from + * vfs_sync() so that items at the tail of the log are always moving out. + * + * Flags: + * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want + * to sleep if we can help it. All we really need + * to do is ensure that the log is synced at least + * periodically. We also push the inodes and + * superblock if we can lock them without sleeping + * and they are not pinned. + * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not + * set, then we really want to lock each inode and flush + * it. + * SYNC_WAIT - All the flushes that take place in this call should + * be synchronous. + * SYNC_DELWRI - This tells us to push dirty pages associated with + * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to + * determine if they should be flushed sync, async, or + * delwri. + * SYNC_CLOSE - This flag is passed when the system is being + * unmounted. We should sync and invalidate everything. + * SYNC_FSDATA - This indicates that the caller would like to make + * sure the superblock is safe on disk. We can ensure + * this by simply making sure the log gets flushed + * if SYNC_BDFLUSH is set, and by actually writing it + * out otherwise. + * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete + * before we return (including direct I/O). Forms the drain + * side of the write barrier needed to safely quiesce the + * filesystem. * */ -STATIC int -xfs_syncsub( +int +xfs_sync( xfs_mount_t *mp, int flags) { - int error = 0; + int error; int last_error = 0; uint log_flags = XFS_LOG_FORCE; + /* + * Get the Quota Manager to flush the dquots. + * + * If XFS quota support is not enabled or this filesystem + * instance does not use quotas XFS_QM_DQSYNC will always + * return zero. + */ + error = XFS_QM_DQSYNC(mp, flags); + if (error) { + /* + * If we got an IO error, we will be shutting down. + * So, there's nothing more for us to do here. + */ + ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(error); + } + + if (flags & SYNC_IOWAIT) + xfs_filestream_flush(mp); + /* * Sync out the log. This ensures that the log is periodically * flushed even if there is not enough activity to fill it up. -- cgit v1.2.3 From aacaa880bfac8fecd44b279a49688643890358f5 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:29 +1100 Subject: [XFS] xfssyncd: don't call xfs_sync Start de-multiplexing xfs_sync() by making xfs_sync_worker() call the specific sync functions it needs. This is only a small, unique subset of the entire xfs_sync() code so is easier to follow. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32307a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d4b7b21a6e5..3c31137cdc7 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -526,6 +526,11 @@ xfs_flush_device( xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); } +/* + * Every sync period we need to unpin all items, reclaim inodes, sync + * quota and write out the superblock. We might need to cover the log + * to indicate it is idle. + */ STATIC void xfs_sync_worker( struct xfs_mount *mp, @@ -533,8 +538,15 @@ xfs_sync_worker( { int error; - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) - error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + /* dgc: errors ignored here */ + error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); + if (xfs_log_need_covered(mp)) + error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); + } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); } -- cgit v1.2.3 From be97d9d5577f6c8a36588e2f262c772c5422b128 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:38 +1100 Subject: [XFS] make SYNC_ATTR no longer use xfs_sync Continue to de-multiplex xfs_sync be replacing all SYNC_ATTR callers with direct calls xfs_sync_inodes(). Add an assert into xfs_sync() to ensure we caught all the SYNC_ATTR callers. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32308a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 3 ++- fs/xfs/linux-2.6/xfs_sync.c | 23 +++++++++++------------ fs/xfs/linux-2.6/xfs_sync.h | 1 - fs/xfs/xfs_vfsops.c | 2 +- 4 files changed, 14 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 767f38e0e0b..38d380a7ca2 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -998,7 +998,8 @@ xfs_fs_put_super( int error; xfs_syncd_stop(mp); - xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); + xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); #ifdef HAVE_DMAPI if (mp->m_flags & XFS_MOUNT_DMAPI) { diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 3c31137cdc7..002ccb6f0cb 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -342,9 +342,8 @@ xfs_sync_fsdata( * periodically. We also push the inodes and * superblock if we can lock them without sleeping * and they are not pinned. - * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not - * set, then we really want to lock each inode and flush - * it. + * SYNC_ATTR - We need to flush the inodes. Now handled by direct calls + * to xfs_sync_inodes(). * SYNC_WAIT - All the flushes that take place in this call should * be synchronous. * SYNC_DELWRI - This tells us to push dirty pages associated with @@ -373,6 +372,8 @@ xfs_sync( int last_error = 0; uint log_flags = XFS_LOG_FORCE; + ASSERT(!(flags & SYNC_ATTR)); + /* * Get the Quota Manager to flush the dquots. * @@ -403,20 +404,18 @@ xfs_sync( xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - if (flags & (SYNC_ATTR|SYNC_DELWRI)) { + if (flags & SYNC_DELWRI) { if (flags & SYNC_BDFLUSH) xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); else error = xfs_sync_inodes(mp, flags); - } - - /* - * Flushing out dirty data above probably generated more - * log activity, so if this isn't vfs_sync() then flush - * the log again. - */ - if (flags & SYNC_DELWRI) + /* + * Flushing out dirty data above probably generated more + * log activity, so if this isn't vfs_sync() then flush + * the log again. + */ xfs_log_force(mp, 0, log_flags); + } if (flags & SYNC_FSDATA) { error = xfs_sync_fsdata(mp, flags); diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 29548619940..5316915c083 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -49,7 +49,6 @@ typedef struct bhv_vfs_sync_work { * to disk (this is the main difference between a sync and a quiesce). */ #define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) -#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index d5396d6f517..c82b9555959 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -76,7 +76,7 @@ xfs_quiesce_fs( */ do { xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); - xfs_sync_inodes(mp, SYNC_INODE_QUIESCE); + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { delay(50); -- cgit v1.2.3 From e9f1c6ee12955fd8657f6f0f9a3d09112b1f1fdd Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:50 +1100 Subject: [XFS] make SYNC_DELWRI no longer use xfs_sync Continue to de-multiplex xfs_sync be replacing all SYNC_DELWRI callers with direct calls functions that do the work. Isolate the data quiesce case to a function in xfs_sync.c. Isolate the FSDATA case with explicit calls to xfs_sync_fsdata(). Version 2: o Push delwri related log forces into xfs_sync_inodes(). SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32309a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 25 ++++++------------------- fs/xfs/linux-2.6/xfs_sync.c | 42 +++++++++++++++++++++++++++++++++++++++++- fs/xfs/linux-2.6/xfs_sync.h | 3 +++ fs/xfs/xfs_vfsops.c | 1 - 4 files changed, 50 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 38d380a7ca2..376f32d9247 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -998,7 +998,6 @@ xfs_fs_put_super( int error; xfs_syncd_stop(mp); - xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); #ifdef HAVE_DMAPI @@ -1057,7 +1056,7 @@ xfs_fs_write_super( struct super_block *sb) { if (!(sb->s_flags & MS_RDONLY)) - xfs_sync(XFS_M(sb), SYNC_FSDATA); + xfs_sync_fsdata(XFS_M(sb), 0); sb->s_dirt = 0; } @@ -1068,7 +1067,6 @@ xfs_fs_sync_super( { struct xfs_mount *mp = XFS_M(sb); int error; - int flags; /* * Treat a sync operation like a freeze. This is to work @@ -1082,20 +1080,10 @@ xfs_fs_sync_super( * dirty the Linux inode until after the transaction I/O * completes. */ - if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) { - /* - * First stage of freeze - no more writers will make progress - * now we are here, so we flush delwri and delalloc buffers - * here, then wait for all I/O to complete. Data is frozen at - * that point. Metadata is not frozen, transactions can still - * occur here so don't bother flushing the buftarg (i.e - * SYNC_QUIESCE) because it'll just get dirty again. - */ - flags = SYNC_DATA_QUIESCE; - } else - flags = SYNC_FSDATA; - - error = xfs_sync(mp, flags); + if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) + error = xfs_quiesce_data(mp); + else + error = xfs_sync_fsdata(mp, 0); sb->s_dirt = 0; if (unlikely(laptop_mode)) { @@ -1233,8 +1221,7 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { - xfs_filestream_flush(mp); - xfs_sync(mp, SYNC_DATA_QUIESCE); + xfs_quiesce_data(mp); xfs_attr_quiesce(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 002ccb6f0cb..838070ce724 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -217,12 +217,16 @@ xfs_sync_inodes( int error; int last_error; int i; + int lflags = XFS_LOG_FORCE; if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; error = 0; last_error = 0; + if (flags & SYNC_WAIT) + lflags |= XFS_LOG_SYNC; + for (i = 0; i < mp->m_sb.sb_agcount; i++) { if (!mp->m_perag[i].pag_ici_init) continue; @@ -232,6 +236,9 @@ xfs_sync_inodes( if (error == EFSCORRUPTED) break; } + if (flags & SYNC_DELWRI) + xfs_log_force(mp, 0, lflags); + return XFS_ERROR(last_error); } @@ -269,7 +276,7 @@ xfs_commit_dummy_trans( return 0; } -STATIC int +int xfs_sync_fsdata( struct xfs_mount *mp, int flags) @@ -322,6 +329,39 @@ xfs_sync_fsdata( return error; } +/* + * First stage of freeze - no more writers will make progress now we are here, + * so we flush delwri and delalloc buffers here, then wait for all I/O to + * complete. Data is frozen at that point. Metadata is not frozen, + * transactions can still occur here so don't bother flushing the buftarg (i.e + * SYNC_QUIESCE) because it'll just get dirty again. + */ +int +xfs_quiesce_data( + struct xfs_mount *mp) +{ + int error; + + /* push non-blocking */ + xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH); + XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + xfs_filestream_flush(mp); + + /* push and block */ + xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT); + XFS_QM_DQSYNC(mp, SYNC_WAIT); + + /* write superblock and hoover shutdown errors */ + error = xfs_sync_fsdata(mp, 0); + + /* flush devices */ + XFS_bflush(mp->m_ddev_targp); + if (mp->m_rtdev_targp) + XFS_bflush(mp->m_rtdev_targp); + + return error; +} + /* * xfs_sync flushes any pending I/O to file system vfsp. * diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 5316915c083..fcd4040c9ad 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -55,6 +55,9 @@ void xfs_syncd_stop(struct xfs_mount *mp); int xfs_sync(struct xfs_mount *mp, int flags); int xfs_sync_inodes(struct xfs_mount *mp, int flags); +int xfs_sync_fsdata(struct xfs_mount *mp, int flags); + +int xfs_quiesce_data(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index c82b9555959..b55a9bb3a6e 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -75,7 +75,6 @@ xfs_quiesce_fs( * logged before we can write the unmount record. */ do { - xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { -- cgit v1.2.3 From cb56a4b995d44b7990ca3acd18db571eedd0649f Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:16:00 +1100 Subject: [XFS] Kill SYNC_CLOSE SYNC_CLOSE is only ever used and checked in conjunction with SYNC_WAIT, and this only done in one spot. The only thing this does is make XFS_bflush() calls to the data buftargs. This will happen very shortly afterwards the xfs_sync() call anyway in the unmount path via the xfs_close_devices(), so this code is redundant and can be removed. That only user of SYNC_CLOSE is now gone, so kill the flag completely. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32310a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 10 ---------- fs/xfs/linux-2.6/xfs_sync.c | 31 ++----------------------------- fs/xfs/linux-2.6/xfs_sync.h | 1 - 3 files changed, 2 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 376f32d9247..60ecf47b9f0 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1026,16 +1026,6 @@ xfs_fs_put_super( error = xfs_unmount_flush(mp, 0); WARN_ON(error); - /* - * If we're forcing a shutdown, typically because of a media error, - * we want to make sure we invalidate dirty pages that belong to - * referenced vnodes as well. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE); - ASSERT(error != EFSCORRUPTED); - } - if (mp->m_flags & XFS_MOUNT_DMAPI) { XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, unmount_event_flags); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 838070ce724..91a54a79a09 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -70,7 +70,7 @@ xfs_sync_inodes_ag( if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ - if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { + if (flags & SYNC_DELWRI) { /* * We need the I/O lock if we're going to call any of * the flush/inval routines. @@ -117,7 +117,7 @@ xfs_sync_inodes_ag( } /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { + if (XFS_FORCED_SHUTDOWN(mp)) { read_unlock(&pag->pag_ici_lock); return 0; } @@ -152,20 +152,6 @@ xfs_sync_inodes_ag( * If we need to drop the lock, insert a marker if we * have not already done so. */ - if (flags & SYNC_CLOSE) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (XFS_FORCED_SHUTDOWN(mp)) - xfs_tosspages(ip, 0, -1, FI_REMAPF); - else - error = xfs_flushinval_pages(ip, 0, -1, - FI_REMAPF); - /* wait for I/O on freeze */ - if (flags & SYNC_IOWAIT) - vn_iowait(ip); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - } - if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); @@ -390,8 +376,6 @@ xfs_quiesce_data( * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to * determine if they should be flushed sync, async, or * delwri. - * SYNC_CLOSE - This flag is passed when the system is being - * unmounted. We should sync and invalidate everything. * SYNC_FSDATA - This indicates that the caller would like to make * sure the superblock is safe on disk. We can ensure * this by simply making sure the log gets flushed @@ -472,17 +456,6 @@ xfs_sync( return error; } - /* - * When shutting down, we need to insure that the AIL is pushed - * to disk or the filesystem can appear corrupt from the PROM. - */ - if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { - XFS_bflush(mp->m_ddev_targp); - if (mp->m_rtdev_targp) { - XFS_bflush(mp->m_rtdev_targp); - } - } - return XFS_ERROR(last_error); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index fcd4040c9ad..2509db021f7 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -28,7 +28,6 @@ typedef struct bhv_vfs_sync_work { } bhv_vfs_sync_work_t; #define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_CLOSE 0x0002 /* close file system down */ #define SYNC_DELWRI 0x0004 /* look at delayed writes */ #define SYNC_WAIT 0x0008 /* wait for i/o to complete */ #define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ -- cgit v1.2.3 From a4e4c4f4a8f9376158f8181a75285091f52a79e3 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:16:11 +1100 Subject: [XFS] Kill xfs_sync() There are no more callers to xfs_sync() now, so remove the function altogther. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32311a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 132 +++++--------------------------------------- fs/xfs/linux-2.6/xfs_sync.h | 25 ++------- fs/xfs/quota/xfs_qm.c | 10 +--- fs/xfs/xfs_iget.c | 15 +++-- 4 files changed, 29 insertions(+), 153 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 91a54a79a09..ed24435af65 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -316,11 +316,21 @@ xfs_sync_fsdata( } /* - * First stage of freeze - no more writers will make progress now we are here, + * When remounting a filesystem read-only or freezing the filesystem, we have + * two phases to execute. This first phase is syncing the data before we + * quiesce the filesystem, and the second is flushing all the inodes out after + * we've waited for all the transactions created by the first phase to + * complete. The second phase ensures that the inodes are written to their + * location on disk rather than just existing in transactions in the log. This + * means after a quiesce there is no log replay required to write the inodes to + * disk (this is the main difference between a sync and a quiesce). + */ +/* + * First stage of freeze - no writers will make progress now we are here, * so we flush delwri and delalloc buffers here, then wait for all I/O to * complete. Data is frozen at that point. Metadata is not frozen, - * transactions can still occur here so don't bother flushing the buftarg (i.e - * SYNC_QUIESCE) because it'll just get dirty again. + * transactions can still occur here so don't bother flushing the buftarg + * because it'll just get dirty again. */ int xfs_quiesce_data( @@ -337,128 +347,16 @@ xfs_quiesce_data( xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT); XFS_QM_DQSYNC(mp, SYNC_WAIT); - /* write superblock and hoover shutdown errors */ + /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, 0); - /* flush devices */ - XFS_bflush(mp->m_ddev_targp); + /* flush data-only devices */ if (mp->m_rtdev_targp) XFS_bflush(mp->m_rtdev_targp); return error; } -/* - * xfs_sync flushes any pending I/O to file system vfsp. - * - * This routine is called by vfs_sync() to make sure that things make it - * out to disk eventually, on sync() system calls to flush out everything, - * and when the file system is unmounted. For the vfs_sync() case, all - * we really need to do is sync out the log to make all of our meta-data - * updates permanent (except for timestamps). For calls from pflushd(), - * dirty pages are kept moving by calling pdflush() on the inodes - * containing them. We also flush the inodes that we can lock without - * sleeping and the superblock if we can lock it without sleeping from - * vfs_sync() so that items at the tail of the log are always moving out. - * - * Flags: - * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want - * to sleep if we can help it. All we really need - * to do is ensure that the log is synced at least - * periodically. We also push the inodes and - * superblock if we can lock them without sleeping - * and they are not pinned. - * SYNC_ATTR - We need to flush the inodes. Now handled by direct calls - * to xfs_sync_inodes(). - * SYNC_WAIT - All the flushes that take place in this call should - * be synchronous. - * SYNC_DELWRI - This tells us to push dirty pages associated with - * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to - * determine if they should be flushed sync, async, or - * delwri. - * SYNC_FSDATA - This indicates that the caller would like to make - * sure the superblock is safe on disk. We can ensure - * this by simply making sure the log gets flushed - * if SYNC_BDFLUSH is set, and by actually writing it - * out otherwise. - * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete - * before we return (including direct I/O). Forms the drain - * side of the write barrier needed to safely quiesce the - * filesystem. - * - */ -int -xfs_sync( - xfs_mount_t *mp, - int flags) -{ - int error; - int last_error = 0; - uint log_flags = XFS_LOG_FORCE; - - ASSERT(!(flags & SYNC_ATTR)); - - /* - * Get the Quota Manager to flush the dquots. - * - * If XFS quota support is not enabled or this filesystem - * instance does not use quotas XFS_QM_DQSYNC will always - * return zero. - */ - error = XFS_QM_DQSYNC(mp, flags); - if (error) { - /* - * If we got an IO error, we will be shutting down. - * So, there's nothing more for us to do here. - */ - ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(error); - } - - if (flags & SYNC_IOWAIT) - xfs_filestream_flush(mp); - - /* - * Sync out the log. This ensures that the log is periodically - * flushed even if there is not enough activity to fill it up. - */ - if (flags & SYNC_WAIT) - log_flags |= XFS_LOG_SYNC; - - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - - if (flags & SYNC_DELWRI) { - if (flags & SYNC_BDFLUSH) - xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); - else - error = xfs_sync_inodes(mp, flags); - /* - * Flushing out dirty data above probably generated more - * log activity, so if this isn't vfs_sync() then flush - * the log again. - */ - xfs_log_force(mp, 0, log_flags); - } - - if (flags & SYNC_FSDATA) { - error = xfs_sync_fsdata(mp, flags); - if (error) - last_error = error; - } - - /* - * Now check to see if the log needs a "dummy" transaction. - */ - if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { - error = xfs_commit_dummy_trans(mp, log_flags); - if (error) - return error; - } - - return XFS_ERROR(last_error); -} - /* * Enqueue a work item to be picked up by the vfs xfssyncd thread. * Doing this has two advantages: diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2509db021f7..4591dc0c788 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -28,31 +28,14 @@ typedef struct bhv_vfs_sync_work { } bhv_vfs_sync_work_t; #define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_DELWRI 0x0004 /* look at delayed writes */ -#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ -#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ -#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ -#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ -#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ -#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ - -/* - * When remounting a filesystem read-only or freezing the filesystem, - * we have two phases to execute. This first phase is syncing the data - * before we quiesce the fielsystem, and the second is flushing all the - * inodes out after we've waited for all the transactions created by - * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE - * to ensure that the inodes are written to their location on disk - * rather than just existing in transactions in the log. This means - * after a quiesce there is no log replay required to write the inodes - * to disk (this is the main difference between a sync and a quiesce). - */ -#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) +#define SYNC_DELWRI 0x0002 /* look at delayed writes */ +#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ +#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ +#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); -int xfs_sync(struct xfs_mount *mp, int flags); int xfs_sync_inodes(struct xfs_mount *mp, int flags); int xfs_sync_fsdata(struct xfs_mount *mp, int flags); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 270f775974e..db1986a205a 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -987,14 +987,10 @@ xfs_qm_dqdetach( } /* - * This is called by VFS_SYNC and flags arg determines the caller, - * and its motives, as done in xfs_sync. - * - * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31 - * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25 - * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA + * This is called to sync quotas. We can be told to use non-blocking + * semantics by either the SYNC_BDFLUSH flag or the absence of the + * SYNC_WAIT flag. */ - int xfs_qm_sync( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 1256746b249..58865fe4780 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -431,14 +431,13 @@ xfs_ireclaim(xfs_inode_t *ip) xfs_iextract(ip); /* - * Here we do a spurious inode lock in order to coordinate with - * xfs_sync(). This is because xfs_sync() references the inodes - * in the mount list without taking references on the corresponding - * vnodes. We make that OK here by ensuring that we wait until - * the inode is unlocked in xfs_sync() before we go ahead and - * free it. We get both the regular lock and the io lock because - * the xfs_sync() code may need to drop the regular one but will - * still hold the io lock. + * Here we do a spurious inode lock in order to coordinate with inode + * cache radix tree lookups. This is because the lookup can reference + * the inodes in the cache without taking references. We make that OK + * here by ensuring that we wait until the inode is unlocked after the + * lookup before we go ahead and free it. We get both the ilock and + * the iolock because the code may need to drop the ilock one but will + * still hold the iolock. */ xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); -- cgit v1.2.3 From 76bf105cb16da6c847a13a3c77dc962ba1081713 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:16:21 +1100 Subject: [XFS] Move remaining quiesce code. With all the other filesystem sync code it in xfs_sync.c including the data quiesce code, it makes sense to move the remaining quiesce code to the same place. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32312a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 6 ++--- fs/xfs/linux-2.6/xfs_sync.c | 55 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_sync.h | 1 + fs/xfs/xfs_vfsops.c | 55 -------------------------------------------- fs/xfs/xfs_vfsops.h | 1 - 5 files changed, 59 insertions(+), 59 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 60ecf47b9f0..15fb262ef86 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1212,7 +1212,7 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { xfs_quiesce_data(mp); - xfs_attr_quiesce(mp); + xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1221,7 +1221,7 @@ xfs_fs_remount( /* * Second stage of a freeze. The data is already frozen so we only - * need to take care of themetadata. Once that's done write a dummy + * need to take care of the metadata. Once that's done write a dummy * record to dirty the log in case of a crash while frozen. */ STATIC void @@ -1230,7 +1230,7 @@ xfs_fs_lockfs( { struct xfs_mount *mp = XFS_M(sb); - xfs_attr_quiesce(mp); + xfs_quiesce_attr(mp); xfs_fs_log_dummy(mp); } diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ed24435af65..b2b708254ae 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -357,6 +357,61 @@ xfs_quiesce_data( return error; } +STATIC void +xfs_quiesce_fs( + struct xfs_mount *mp) +{ + int count = 0, pincount; + + xfs_flush_buftarg(mp->m_ddev_targp, 0); + xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + + /* + * This loop must run at least twice. The first instance of the loop + * will flush most meta data but that will generate more meta data + * (typically directory updates). Which then must be flushed and + * logged before we can write the unmount record. + */ + do { + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); + pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); + if (!pincount) { + delay(50); + count++; + } + } while (count < 2); +} + +/* + * Second stage of a quiesce. The data is already synced, now we have to take + * care of the metadata. New transactions are already blocked, so we need to + * wait for any remaining transactions to drain out before proceding. + */ +void +xfs_quiesce_attr( + struct xfs_mount *mp) +{ + int error = 0; + + /* wait for all modifications to complete */ + while (atomic_read(&mp->m_active_trans) > 0) + delay(100); + + /* flush inodes and push all remaining buffers out to disk */ + xfs_quiesce_fs(mp); + + ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); + + /* Push the superblock and write an unmount record */ + error = xfs_log_sbcount(mp, 1); + if (error) + xfs_fs_cmn_err(CE_WARN, mp, + "xfs_attr_quiesce: failed to log sb changes. " + "Frozen image may not be consistent."); + xfs_log_unmount_write(mp); + xfs_unmountfs_writesb(mp); +} + /* * Enqueue a work item to be picked up by the vfs xfssyncd thread. * Doing this has two advantages: diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 4591dc0c788..3b49aa3bb5f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -40,6 +40,7 @@ int xfs_sync_inodes(struct xfs_mount *mp, int flags); int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); +void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index b55a9bb3a6e..883dd0f68e9 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -59,61 +59,6 @@ #include "xfs_sync.h" -STATIC void -xfs_quiesce_fs( - xfs_mount_t *mp) -{ - int count = 0, pincount; - - xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); - - /* - * This loop must run at least twice. The first instance of the loop - * will flush most meta data but that will generate more meta data - * (typically directory updates). Which then must be flushed and - * logged before we can write the unmount record. - */ - do { - xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); - pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); - if (!pincount) { - delay(50); - count++; - } - } while (count < 2); -} - -/* - * Second stage of a quiesce. The data is already synced, now we have to take - * care of the metadata. New transactions are already blocked, so we need to - * wait for any remaining transactions to drain out before proceding. - */ -void -xfs_attr_quiesce( - xfs_mount_t *mp) -{ - int error = 0; - - /* wait for all modifications to complete */ - while (atomic_read(&mp->m_active_trans) > 0) - delay(100); - - /* flush inodes and push all remaining buffers out to disk */ - xfs_quiesce_fs(mp); - - ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); - - /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp, 1); - if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_attr_quiesce: failed to log sb changes. " - "Frozen image may not be consistent."); - xfs_log_unmount_write(mp); - xfs_unmountfs_writesb(mp); -} - /* * xfs_unmount_flush implements a set of flush operation on special * inodes, which are needed as a separate set of operations so that diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h index 6701d0ed8ad..6b8e0b52b95 100644 --- a/fs/xfs/xfs_vfsops.h +++ b/fs/xfs/xfs_vfsops.h @@ -10,6 +10,5 @@ struct xfs_mount_args; void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, int lnnum); -void xfs_attr_quiesce(struct xfs_mount *mp); #endif /* _XFS_VFSOPS_H */ -- cgit v1.2.3 From 3471394ba56f44761ce1c300f139478dbfb49d4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:21:10 +1100 Subject: [XFS] fix instant oops with tracing enabled We can only read inode->i_count if the inode is actually there and not a NULL pointer. This was introduced in one of the recent sync patches. SGI-PV: 988255 SGI-Modid: xfs-linux-melb:xfs-kern:32315a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_vnode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index dceb6dbaa2d..ac827d23149 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -92,7 +92,7 @@ static inline int xfs_icount(struct xfs_inode *ip) { struct inode *inode = VFS_I(ip); - if (!inode) + if (inode) return atomic_read(&inode->i_count); return -1; } -- cgit v1.2.3 From 6441e549157b749bae003cce70b4c8b62e4801fa Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:21:19 +1100 Subject: [XFS] factor xfs_iget_core() into hit and miss cases There are really two cases in xfs_iget_core(). The first is the cache hit case, the second is the miss case. They share very little code, and hence can easily be factored out into separate functions. This makes the code much easier to understand and subsequently modify. SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32317a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_iget.c | 348 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 191 insertions(+), 157 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 58865fe4780..b2539b17c95 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -40,161 +40,119 @@ #include "xfs_utils.h" /* - * Look up an inode by number in the given file system. - * The inode is looked up in the cache held in each AG. - * If the inode is found in the cache, attach it to the provided - * vnode. - * - * If it is not in core, read it in from the file system's device, - * add it to the cache and attach the provided vnode. - * - * The inode is locked according to the value of the lock_flags parameter. - * This flag parameter indicates how and if the inode's IO lock and inode lock - * should be taken. - * - * mp -- the mount point structure for the current file system. It points - * to the inode hash table. - * tp -- a pointer to the current transaction if there is one. This is - * simply passed through to the xfs_iread() call. - * ino -- the number of the inode desired. This is the unique identifier - * within the file system for the inode being requested. - * lock_flags -- flags indicating how to lock the inode. See the comment - * for xfs_ilock() for a list of valid values. - * bno -- the block number starting the buffer containing the inode, - * if known (as by bulkstat), else 0. + * Check the validity of the inode we just found it the cache */ -STATIC int -xfs_iget_core( - struct inode *inode, - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - uint flags, - uint lock_flags, - xfs_inode_t **ipp, - xfs_daddr_t bno) +static int +xfs_iget_cache_hit( + struct inode *inode, + struct xfs_perag *pag, + struct xfs_inode *ip, + int flags, + int lock_flags) __releases(pag->pag_ici_lock) { - struct inode *old_inode; - xfs_inode_t *ip; - int error; - unsigned long first_index, mask; - xfs_perag_t *pag; - xfs_agino_t agino; + struct xfs_mount *mp = ip->i_mount; + struct inode *old_inode; + int error = 0; - /* the radix tree exists only in inode capable AGs */ - if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) - return EINVAL; - - /* get the perag structure and ensure that it's inode capable */ - pag = xfs_get_perag(mp, ino); - if (!pag->pagi_inodeok) - return EINVAL; - ASSERT(pag->pag_ici_init); - agino = XFS_INO_TO_AGINO(mp, ino); - -again: - read_lock(&pag->pag_ici_lock); - ip = radix_tree_lookup(&pag->pag_ici_root, agino); + /* + * If INEW is set this inode is being set up + * Pause and try again. + */ + if (xfs_iflags_test(ip, XFS_INEW)) { + error = EAGAIN; + XFS_STATS_INC(xs_ig_frecycle); + goto out_error; + } - if (ip != NULL) { + old_inode = ip->i_vnode; + if (old_inode == NULL) { /* - * If INEW is set this inode is being set up + * If IRECLAIM is set this inode is + * on its way out of the system, * we need to pause and try again. */ - if (xfs_iflags_test(ip, XFS_INEW)) { - read_unlock(&pag->pag_ici_lock); - delay(1); + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { + error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); + goto out_error; + } + ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - goto again; + /* + * If lookup is racing with unlink, then we + * should return an error immediately so we + * don't remove it from the reclaim list and + * potentially leak the inode. + */ + if ((ip->i_d.di_mode == 0) && + !(flags & XFS_IGET_CREATE)) { + error = ENOENT; + goto out_error; } + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - old_inode = ip->i_vnode; - if (old_inode == NULL) { - /* - * If IRECLAIM is set this inode is - * on its way out of the system, - * we need to pause and try again. - */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - read_unlock(&pag->pag_ici_lock); - delay(1); - XFS_STATS_INC(xs_ig_frecycle); - - goto again; - } - ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - - /* - * If lookup is racing with unlink, then we - * should return an error immediately so we - * don't remove it from the reclaim list and - * potentially leak the inode. - */ - if ((ip->i_d.di_mode == 0) && - !(flags & XFS_IGET_CREATE)) { - read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); - return ENOENT; - } - - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - - XFS_STATS_INC(xs_ig_found); - xfs_iflags_clear(ip, XFS_IRECLAIMABLE); - read_unlock(&pag->pag_ici_lock); - - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); - XFS_MOUNT_IUNLOCK(mp); - - goto finish_inode; - - } else if (inode != old_inode) { - /* The inode is being torn down, pause and - * try again. - */ - if (old_inode->i_state & (I_FREEING | I_CLEAR)) { - read_unlock(&pag->pag_ici_lock); - delay(1); - XFS_STATS_INC(xs_ig_frecycle); - - goto again; - } + xfs_iflags_clear(ip, XFS_IRECLAIMABLE); + read_unlock(&pag->pag_ici_lock); + + XFS_MOUNT_ILOCK(mp); + list_del_init(&ip->i_reclaim); + XFS_MOUNT_IUNLOCK(mp); + + } else if (inode != old_inode) { + /* The inode is being torn down, pause and + * try again. + */ + if (old_inode->i_state & (I_FREEING | I_CLEAR)) { + error = EAGAIN; + XFS_STATS_INC(xs_ig_frecycle); + goto out_error; + } /* Chances are the other vnode (the one in the inode) is being torn * down right now, and we landed on top of it. Question is, what do * we do? Unhook the old inode and hook up the new one? */ - cmn_err(CE_PANIC, - "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", - old_inode, inode); - } - - /* - * Inode cache hit - */ + cmn_err(CE_PANIC, + "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", + old_inode, inode); + } else { read_unlock(&pag->pag_ici_lock); - XFS_STATS_INC(xs_ig_found); + } -finish_inode: - if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { - xfs_put_perag(mp, pag); - return ENOENT; - } + if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { + error = ENOENT; + goto out; + } - if (lock_flags != 0) - xfs_ilock(ip, lock_flags); + if (lock_flags != 0) + xfs_ilock(ip, lock_flags); - xfs_iflags_clear(ip, XFS_ISTALE); - xfs_itrace_exit_tag(ip, "xfs_iget.found"); - goto return_ip; - } + xfs_iflags_clear(ip, XFS_ISTALE); + xfs_itrace_exit_tag(ip, "xfs_iget.found"); + XFS_STATS_INC(xs_ig_found); + return 0; - /* - * Inode cache miss - */ +out_error: read_unlock(&pag->pag_ici_lock); - XFS_STATS_INC(xs_ig_missed); +out: + return error; +} + + +static int +xfs_iget_cache_miss( + struct xfs_mount *mp, + struct xfs_perag *pag, + xfs_trans_t *tp, + xfs_ino_t ino, + struct xfs_inode **ipp, + xfs_daddr_t bno, + int flags, + int lock_flags) __releases(pag->pag_ici_lock) +{ + struct xfs_inode *ip; + int error; + unsigned long first_index, mask; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); /* * Read the disk inode attributes into a new inode structure and get @@ -202,17 +160,14 @@ finish_inode: */ error = xfs_iread(mp, tp, ino, &ip, bno, (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); - if (error) { - xfs_put_perag(mp, pag); + if (error) return error; - } xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { - xfs_idestroy(ip); - xfs_put_perag(mp, pag); - return ENOENT; + error = ENOENT; + goto out_destroy; } /* @@ -220,9 +175,8 @@ finish_inode: * write spinlock. */ if (radix_tree_preload(GFP_KERNEL)) { - xfs_idestroy(ip); - delay(1); - goto again; + error = EAGAIN; + goto out_destroy; } if (lock_flags) @@ -231,32 +185,104 @@ finish_inode: mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = agino & mask; write_lock(&pag->pag_ici_lock); - /* - * insert the new inode - */ + + /* insert the new inode */ error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { - BUG_ON(error != -EEXIST); - write_unlock(&pag->pag_ici_lock); - radix_tree_preload_end(); - if (lock_flags) - xfs_iunlock(ip, lock_flags); - xfs_idestroy(ip); + WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); - goto again; + error = EAGAIN; + goto out_unlock; } - /* - * These values _must_ be set before releasing the radix tree lock! - */ + /* These values _must_ be set before releasing the radix tree lock! */ ip->i_udquot = ip->i_gdquot = NULL; xfs_iflags_set(ip, XFS_INEW); write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); + *ipp = ip; + return 0; + +out_unlock: + write_unlock(&pag->pag_ici_lock); + radix_tree_preload_end(); +out_destroy: + xfs_idestroy(ip); + return error; +} + +/* + * Look up an inode by number in the given file system. + * The inode is looked up in the cache held in each AG. + * If the inode is found in the cache, attach it to the provided + * vnode. + * + * If it is not in core, read it in from the file system's device, + * add it to the cache and attach the provided vnode. + * + * The inode is locked according to the value of the lock_flags parameter. + * This flag parameter indicates how and if the inode's IO lock and inode lock + * should be taken. + * + * mp -- the mount point structure for the current file system. It points + * to the inode hash table. + * tp -- a pointer to the current transaction if there is one. This is + * simply passed through to the xfs_iread() call. + * ino -- the number of the inode desired. This is the unique identifier + * within the file system for the inode being requested. + * lock_flags -- flags indicating how to lock the inode. See the comment + * for xfs_ilock() for a list of valid values. + * bno -- the block number starting the buffer containing the inode, + * if known (as by bulkstat), else 0. + */ +STATIC int +xfs_iget_core( + struct inode *inode, + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_ino_t ino, + uint flags, + uint lock_flags, + xfs_inode_t **ipp, + xfs_daddr_t bno) +{ + xfs_inode_t *ip; + int error; + xfs_perag_t *pag; + xfs_agino_t agino; + + /* the radix tree exists only in inode capable AGs */ + if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) + return EINVAL; + + /* get the perag structure and ensure that it's inode capable */ + pag = xfs_get_perag(mp, ino); + if (!pag->pagi_inodeok) + return EINVAL; + ASSERT(pag->pag_ici_init); + agino = XFS_INO_TO_AGINO(mp, ino); + +again: + error = 0; + read_lock(&pag->pag_ici_lock); + ip = radix_tree_lookup(&pag->pag_ici_root, agino); + + if (ip) { + error = xfs_iget_cache_hit(inode, pag, ip, flags, lock_flags); + if (error) + goto out_error_or_again; + } else { + read_unlock(&pag->pag_ici_lock); + XFS_STATS_INC(xs_ig_missed); + + error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, + flags, lock_flags); + if (error) + goto out_error_or_again; + } xfs_put_perag(mp, pag); - return_ip: ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); @@ -276,6 +302,14 @@ finish_inode: if (ip->i_d.di_mode != 0) xfs_setup_inode(ip); return 0; + +out_error_or_again: + if (error == EAGAIN) { + delay(1); + goto again; + } + xfs_put_perag(mp, pag); + return error; } -- cgit v1.2.3 From 94b97e39b0c983e86f0028c456dcf213abc722a0 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:21:30 +1100 Subject: [XFS] Never call mark_inode_dirty_sync() directly Once the Linux inode and the XFS inode are combined, we cannot rely on just check if the linux inode exists as a method of determining if it is valid or not. Hence we should always call xfs_mark_inode_dirty_sync() instead as it does the correct checks to determine if the liinux inode is in a valid state or not. SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32318a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_aops.c | 2 +- fs/xfs/linux-2.6/xfs_iops.c | 2 +- fs/xfs/linux-2.6/xfs_super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index a44d68eb50b..8fbc97df360 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -191,7 +191,7 @@ xfs_setfilesize( ip->i_d.di_size = isize; ip->i_update_core = 1; ip->i_update_size = 1; - mark_inode_dirty_sync(ioend->io_inode); + xfs_mark_inode_dirty_sync(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 095d271f343..3bfb3c0f8e2 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -128,7 +128,7 @@ xfs_ichgtime( if (sync_it) { SYNCHRONIZE(); ip->i_update_core = 1; - mark_inode_dirty_sync(inode); + xfs_mark_inode_dirty_sync(ip); } } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 15fb262ef86..b87e45577a5 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -948,7 +948,7 @@ xfs_fs_write_inode( * it dirty again so we'll try again later. */ if (error) - mark_inode_dirty_sync(inode); + xfs_mark_inode_dirty_sync(XFS_I(inode)); return -error; } -- cgit v1.2.3 From 2cb1599f9b2ecdd7a9e59feeee647eb258966839 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:32:23 +1100 Subject: Inode: Allow external initialisers To allow XFS to combine the XFS and linux inodes into a single structure, we need to drive inode lookup from the XFS inode cache, not the generic inode cache. This means that we need initialise a struct inode from a context outside alloc_inode() as it is no longer used by XFS. Factor and export the struct inode initialisation code from alloc_inode() to inode_init_always() as a counterpart to inode_init_once(). i.e. we have to call this init function for each inode instantiation (always), as opposed inode_init_once() which is only called on slab object instantiation (once). Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/inode.c | 140 ++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 78 insertions(+), 62 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 0487ddba139..e7ee99907d6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode) wake_up_bit(&inode->i_state, __I_LOCK); } -static struct inode *alloc_inode(struct super_block *sb) +/** + * inode_init_always - perform inode structure intialisation + * @sb - superblock inode belongs to. + * @inode - inode to initialise + * + * These are initializations that need to be done on every inode + * allocation as the fields are not initialised by slab allocation. + */ +struct inode *inode_init_always(struct super_block *sb, struct inode *inode) { static const struct address_space_operations empty_aops; static struct inode_operations empty_iops; static const struct file_operations empty_fops; - struct inode *inode; - - if (sb->s_op->alloc_inode) - inode = sb->s_op->alloc_inode(sb); - else - inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL); - if (inode) { - struct address_space * const mapping = &inode->i_data; - - inode->i_sb = sb; - inode->i_blkbits = sb->s_blocksize_bits; - inode->i_flags = 0; - atomic_set(&inode->i_count, 1); - inode->i_op = &empty_iops; - inode->i_fop = &empty_fops; - inode->i_nlink = 1; - atomic_set(&inode->i_writecount, 0); - inode->i_size = 0; - inode->i_blocks = 0; - inode->i_bytes = 0; - inode->i_generation = 0; + struct address_space * const mapping = &inode->i_data; + + inode->i_sb = sb; + inode->i_blkbits = sb->s_blocksize_bits; + inode->i_flags = 0; + atomic_set(&inode->i_count, 1); + inode->i_op = &empty_iops; + inode->i_fop = &empty_fops; + inode->i_nlink = 1; + atomic_set(&inode->i_writecount, 0); + inode->i_size = 0; + inode->i_blocks = 0; + inode->i_bytes = 0; + inode->i_generation = 0; #ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); + memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); #endif - inode->i_pipe = NULL; - inode->i_bdev = NULL; - inode->i_cdev = NULL; - inode->i_rdev = 0; - inode->dirtied_when = 0; - if (security_inode_alloc(inode)) { - if (inode->i_sb->s_op->destroy_inode) - inode->i_sb->s_op->destroy_inode(inode); - else - kmem_cache_free(inode_cachep, (inode)); - return NULL; - } + inode->i_pipe = NULL; + inode->i_bdev = NULL; + inode->i_cdev = NULL; + inode->i_rdev = 0; + inode->dirtied_when = 0; + if (security_inode_alloc(inode)) { + if (inode->i_sb->s_op->destroy_inode) + inode->i_sb->s_op->destroy_inode(inode); + else + kmem_cache_free(inode_cachep, (inode)); + return NULL; + } - spin_lock_init(&inode->i_lock); - lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); + spin_lock_init(&inode->i_lock); + lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); - init_rwsem(&inode->i_alloc_sem); - lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); + init_rwsem(&inode->i_alloc_sem); + lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); - mapping->a_ops = &empty_aops; - mapping->host = inode; - mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); - mapping->assoc_mapping = NULL; - mapping->backing_dev_info = &default_backing_dev_info; - mapping->writeback_index = 0; + mapping->a_ops = &empty_aops; + mapping->host = inode; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = &default_backing_dev_info; + mapping->writeback_index = 0; - /* - * If the block_device provides a backing_dev_info for client - * inodes then use that. Otherwise the inode share the bdev's - * backing_dev_info. - */ - if (sb->s_bdev) { - struct backing_dev_info *bdi; + /* + * If the block_device provides a backing_dev_info for client + * inodes then use that. Otherwise the inode share the bdev's + * backing_dev_info. + */ + if (sb->s_bdev) { + struct backing_dev_info *bdi; - bdi = sb->s_bdev->bd_inode_backing_dev_info; - if (!bdi) - bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; - mapping->backing_dev_info = bdi; - } - inode->i_private = NULL; - inode->i_mapping = mapping; + bdi = sb->s_bdev->bd_inode_backing_dev_info; + if (!bdi) + bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + mapping->backing_dev_info = bdi; } + inode->i_private = NULL; + inode->i_mapping = mapping; + return inode; } +EXPORT_SYMBOL(inode_init_always); + +static struct inode *alloc_inode(struct super_block *sb) +{ + struct inode *inode; + + if (sb->s_op->alloc_inode) + inode = sb->s_op->alloc_inode(sb); + else + inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); + + if (inode) + return inode_init_always(sb, inode); + return NULL; +} void destroy_inode(struct inode *inode) { -- cgit v1.2.3 From 8290c35f87304a6b73d4fd17b03580b4f7425de8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:35:24 +1100 Subject: Inode: Allow external list initialisation To allow XFS to combine the XFS and linux inodes into a single structure, we need to drive inode lookup from the XFS inode cache, not the generic inode cache. This means that we need initialise a struct inode from a context outside alloc_inode() as it is no longer used by XFS. After inode allocation and initialisation, we need to add the inode to the superblock list, the in-use list, hash it and do some accounting. This all needs to be done with the inode_lock held and there are already several places in fs/inode.c that do this list manipulation. Factor out the common code, add a locking wrapper and export the function so ti can be called from XFS. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/inode.c | 67 ++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index e7ee99907d6..fbcf6c5e760 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -550,6 +550,49 @@ repeat: return node ? inode : NULL; } +static unsigned long hash(struct super_block *sb, unsigned long hashval) +{ + unsigned long tmp; + + tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / + L1_CACHE_BYTES; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); + return tmp & I_HASHMASK; +} + +static inline void +__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, + struct inode *inode) +{ + inodes_stat.nr_inodes++; + list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); + if (head) + hlist_add_head(&inode->i_hash, head); +} + +/** + * inode_add_to_lists - add a new inode to relevant lists + * @sb - superblock inode belongs to. + * @inode - inode to mark in use + * + * When an inode is allocated it needs to be accounted for, added to the in use + * list, the owning superblock and the inode hash. This needs to be done under + * the inode_lock, so export a function to do this rather than the inode lock + * itself. We calculate the hash list to add to here so it is all internal + * which requires the caller to have already set up the inode number in the + * inode to add. + */ +void inode_add_to_lists(struct super_block *sb, struct inode *inode) +{ + struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); + + spin_lock(&inode_lock); + __inode_add_to_lists(sb, head, inode); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL_GPL(inode_add_to_lists); + /** * new_inode - obtain an inode * @sb: superblock @@ -577,9 +620,7 @@ struct inode *new_inode(struct super_block *sb) inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); + __inode_add_to_lists(sb, NULL, inode); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); @@ -638,10 +679,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h if (set(inode, data)) goto set_failed; - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); - hlist_add_head(&inode->i_hash, head); + __inode_add_to_lists(sb, head, inode); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -687,10 +725,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he old = find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); - hlist_add_head(&inode->i_hash, head); + __inode_add_to_lists(sb, head, inode); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -714,16 +749,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he return inode; } -static unsigned long hash(struct super_block *sb, unsigned long hashval) -{ - unsigned long tmp; - - tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / - L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; -} - /** * iunique - get a unique inode number * @sb: superblock -- cgit v1.2.3 From bf904248a2adb3f3be4eb4fb1837ce3bb28cca76 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:36:14 +1100 Subject: [XFS] Combine the XFS and Linux inodes To avoid issues with different lifecycles of XFS and Linux inodes, embedd the linux inode inside the XFS inode. This means that the linux inode has the same lifecycle as the XFS inode, even when it has been released by the OS. XFS inodes don't live much longer than this (a short stint in reclaim at most), so there isn't significant memory usage penalties here. Version 3 o kill xfs_icount() Version 2 o remove unused commented out code from xfs_iget(). o kill useless cast in VFS_I() SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32323a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_iops.c | 17 +++-- fs/xfs/linux-2.6/xfs_super.c | 47 +++++------- fs/xfs/linux-2.6/xfs_vnode.c | 15 +--- fs/xfs/xfs_iget.c | 167 +++++++++---------------------------------- fs/xfs/xfs_inode.c | 43 ++++++++--- fs/xfs/xfs_inode.h | 9 ++- fs/xfs/xfs_vnodeops.c | 13 +--- 7 files changed, 108 insertions(+), 203 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 3bfb3c0f8e2..37bb1012aff 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -64,14 +64,14 @@ xfs_synchronize_atime( { struct inode *inode = VFS_I(ip); - if (inode) { + if (!(inode->i_state & I_CLEAR)) { ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; } } /* - * If the linux inode exists, mark it dirty. + * If the linux inode is valid, mark it dirty. * Used when commiting a dirty inode into a transaction so that * the inode will get written back by the linux code */ @@ -81,7 +81,7 @@ xfs_mark_inode_dirty_sync( { struct inode *inode = VFS_I(ip); - if (inode) + if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) mark_inode_dirty_sync(inode); } @@ -766,12 +766,21 @@ xfs_diflags_to_iflags( * When reading existing inodes from disk this is called directly * from xfs_iget, when creating a new inode it is called from * xfs_ialloc after setting up the inode. + * + * We are always called with an uninitialised linux inode here. + * We need to initialise the necessary fields and take a reference + * on it. */ void xfs_setup_inode( struct xfs_inode *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = &ip->i_vnode; + + inode->i_ino = ip->i_ino; + inode->i_state = I_NEW|I_LOCK; + inode_add_to_lists(ip->i_mount->m_super, inode); + ASSERT(atomic_read(&inode->i_count) == 1); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index b87e45577a5..c6ef684bf2e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -72,7 +72,6 @@ static struct quotactl_ops xfs_quotactl_operations; static struct super_operations xfs_super_operations; -static kmem_zone_t *xfs_vnode_zone; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; @@ -867,29 +866,24 @@ xfsaild_stop( } - +/* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { - return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); + BUG(); } +/* + * we need to provide an empty inode free function to prevent + * the generic code from trying to free our combined inode. + */ STATIC void xfs_fs_destroy_inode( - struct inode *inode) -{ - kmem_zone_free(xfs_vnode_zone, inode); -} - -STATIC void -xfs_fs_inode_init_once( - void *vnode) + struct inode *inode) { - inode_init_once((struct inode *)vnode); } - /* * Slab object creation initialisation for the XFS inode. * This covers only the idempotent fields in the XFS inode; @@ -898,13 +892,18 @@ xfs_fs_inode_init_once( * fields in the xfs inode that left in the initialise state * when freeing the inode. */ -void -xfs_inode_init_once( +STATIC void +xfs_fs_inode_init_once( void *inode) { struct xfs_inode *ip = inode; memset(ip, 0, sizeof(struct xfs_inode)); + + /* vfs inode */ + inode_init_once(VFS_I(ip)); + + /* xfs inode */ atomic_set(&ip->i_iocount, 0); atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); @@ -975,8 +974,6 @@ xfs_fs_clear_inode( if (xfs_reclaim(ip)) panic("%s: cannot reclaim 0x%p\n", __func__, inode); } - - ASSERT(XFS_I(inode) == NULL); } STATIC void @@ -1829,16 +1826,10 @@ xfs_free_trace_bufs(void) STATIC int __init xfs_init_zones(void) { - xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, - xfs_fs_inode_init_once); - if (!xfs_vnode_zone) - goto out; xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); if (!xfs_ioend_zone) - goto out_destroy_vnode_zone; + goto out; xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, xfs_ioend_zone); @@ -1854,6 +1845,7 @@ xfs_init_zones(void) "xfs_bmap_free_item"); if (!xfs_bmap_free_item_zone) goto out_destroy_log_ticket_zone; + xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), "xfs_btree_cur"); if (!xfs_btree_cur_zone) @@ -1901,8 +1893,8 @@ xfs_init_zones(void) xfs_inode_zone = kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, xfs_inode_init_once); + KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, + xfs_fs_inode_init_once); if (!xfs_inode_zone) goto out_destroy_efi_zone; @@ -1950,8 +1942,6 @@ xfs_init_zones(void) mempool_destroy(xfs_ioend_pool); out_destroy_ioend_zone: kmem_zone_destroy(xfs_ioend_zone); - out_destroy_vnode_zone: - kmem_zone_destroy(xfs_vnode_zone); out: return -ENOMEM; } @@ -1976,7 +1966,6 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_log_ticket_zone); mempool_destroy(xfs_ioend_pool); kmem_zone_destroy(xfs_ioend_zone); - kmem_zone_destroy(xfs_vnode_zone); } diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index ac827d23149..ad18262d651 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -84,25 +84,12 @@ vn_ioerror( #ifdef XFS_INODE_TRACE -/* - * Reference count of Linux inode if present, -1 if the xfs_inode - * has no associated Linux inode. - */ -static inline int xfs_icount(struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - if (inode) - return atomic_read(&inode->i_count); - return -1; -} - #define KTRACE_ENTER(ip, vk, s, line, ra) \ ktrace_enter( (ip)->i_trace, \ /* 0 */ (void *)(__psint_t)(vk), \ /* 1 */ (void *)(s), \ /* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(__psint_t)xfs_icount(ip), \ +/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ /* 4 */ (void *)(ra), \ /* 5 */ NULL, \ /* 6 */ (void *)(__psint_t)current_cpu(), \ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b2539b17c95..c4414e8bce8 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -44,77 +44,65 @@ */ static int xfs_iget_cache_hit( - struct inode *inode, struct xfs_perag *pag, struct xfs_inode *ip, int flags, int lock_flags) __releases(pag->pag_ici_lock) { struct xfs_mount *mp = ip->i_mount; - struct inode *old_inode; int error = 0; /* * If INEW is set this inode is being set up + * If IRECLAIM is set this inode is being torn down * Pause and try again. */ - if (xfs_iflags_test(ip, XFS_INEW)) { + if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); goto out_error; } - old_inode = ip->i_vnode; - if (old_inode == NULL) { + /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ + if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + /* - * If IRECLAIM is set this inode is - * on its way out of the system, - * we need to pause and try again. + * If lookup is racing with unlink, then we should return an + * error immediately so we don't remove it from the reclaim + * list and potentially leak the inode. */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - error = EAGAIN; - XFS_STATS_INC(xs_ig_frecycle); + + if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { + error = ENOENT; goto out_error; } - ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); /* - * If lookup is racing with unlink, then we - * should return an error immediately so we - * don't remove it from the reclaim list and - * potentially leak the inode. + * We need to re-initialise the VFS inode as it has been + * 'freed' by the VFS. Do this here so we can deal with + * errors cleanly, then tag it so it can be set up correctly + * later. */ - if ((ip->i_d.di_mode == 0) && - !(flags & XFS_IGET_CREATE)) { - error = ENOENT; + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + error = ENOMEM; goto out_error; } - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - + xfs_iflags_set(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_IRECLAIMABLE); read_unlock(&pag->pag_ici_lock); XFS_MOUNT_ILOCK(mp); list_del_init(&ip->i_reclaim); XFS_MOUNT_IUNLOCK(mp); - - } else if (inode != old_inode) { - /* The inode is being torn down, pause and - * try again. - */ - if (old_inode->i_state & (I_FREEING | I_CLEAR)) { - error = EAGAIN; - XFS_STATS_INC(xs_ig_frecycle); - goto out_error; - } -/* Chances are the other vnode (the one in the inode) is being torn -* down right now, and we landed on top of it. Question is, what do -* we do? Unhook the old inode and hook up the new one? -*/ - cmn_err(CE_PANIC, - "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", - old_inode, inode); + } else if (!igrab(VFS_I(ip))) { + /* If the VFS inode is being torn down, pause and try again. */ + error = EAGAIN; + XFS_STATS_INC(xs_ig_frecycle); + goto out_error; } else { + /* we've got a live one */ read_unlock(&pag->pag_ici_lock); } @@ -215,11 +203,11 @@ out_destroy: /* * Look up an inode by number in the given file system. * The inode is looked up in the cache held in each AG. - * If the inode is found in the cache, attach it to the provided - * vnode. + * If the inode is found in the cache, initialise the vfs inode + * if necessary. * * If it is not in core, read it in from the file system's device, - * add it to the cache and attach the provided vnode. + * add it to the cache and initialise the vfs inode. * * The inode is locked according to the value of the lock_flags parameter. * This flag parameter indicates how and if the inode's IO lock and inode lock @@ -236,9 +224,8 @@ out_destroy: * bno -- the block number starting the buffer containing the inode, * if known (as by bulkstat), else 0. */ -STATIC int -xfs_iget_core( - struct inode *inode, +int +xfs_iget( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, @@ -269,7 +256,7 @@ again: ip = radix_tree_lookup(&pag->pag_ici_root, agino); if (ip) { - error = xfs_iget_cache_hit(inode, pag, ip, flags, lock_flags); + error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); if (error) goto out_error_or_again; } else { @@ -283,23 +270,16 @@ again: } xfs_put_perag(mp, pag); - ASSERT(ip->i_df.if_ext_max == - XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); - xfs_iflags_set(ip, XFS_IMODIFIED); *ipp = ip; - /* - * Set up the Linux with the Linux inode. - */ - ip->i_vnode = inode; - inode->i_private = ip; - + ASSERT(ip->i_df.if_ext_max == + XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); /* * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - if (ip->i_d.di_mode != 0) + if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) xfs_setup_inode(ip); return 0; @@ -313,75 +293,6 @@ out_error_or_again: } -/* - * The 'normal' internal xfs_iget, if needed it will - * 'allocate', or 'get', the vnode. - */ -int -xfs_iget( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - uint flags, - uint lock_flags, - xfs_inode_t **ipp, - xfs_daddr_t bno) -{ - struct inode *inode; - xfs_inode_t *ip; - int error; - - XFS_STATS_INC(xs_ig_attempts); - -retry: - inode = iget_locked(mp->m_super, ino); - if (!inode) - /* If we got no inode we are out of memory */ - return ENOMEM; - - if (inode->i_state & I_NEW) { - XFS_STATS_INC(vn_active); - XFS_STATS_INC(vn_alloc); - - error = xfs_iget_core(inode, mp, tp, ino, flags, - lock_flags, ipp, bno); - if (error) { - make_bad_inode(inode); - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - iput(inode); - } - return error; - } - - /* - * If the inode is not fully constructed due to - * filehandle mismatches wait for the inode to go - * away and try again. - * - * iget_locked will call __wait_on_freeing_inode - * to wait for the inode to go away. - */ - if (is_bad_inode(inode)) { - iput(inode); - delay(1); - goto retry; - } - - ip = XFS_I(inode); - if (!ip) { - iput(inode); - delay(1); - goto retry; - } - - if (lock_flags != 0) - xfs_ilock(ip, lock_flags); - XFS_STATS_INC(xs_ig_found); - *ipp = ip; - return 0; -} - /* * Look for the inode corresponding to the given ino in the hash table. * If it is there and its i_transp pointer matches tp, return it. @@ -481,14 +392,6 @@ xfs_ireclaim(xfs_inode_t *ip) */ XFS_QM_DQDETACH(ip->i_mount, ip); - /* - * Pull our behavior descriptor from the vnode chain. - */ - if (ip->i_vnode) { - ip->i_vnode->i_private = NULL; - ip->i_vnode = NULL; - } - /* * Free all memory associated with the inode. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bc33762abc4..99d9118c4a4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -813,6 +813,16 @@ xfs_inode_alloc( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(list_empty(&ip->i_reclaim)); + /* + * initialise the VFS inode here to get failures + * out of the way early. + */ + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } + + /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; ip->i_blkno = 0; @@ -1086,6 +1096,7 @@ xfs_ialloc( uint flags; int error; timespec_t tv; + int filestreams = 0; /* * Call the space management code to pick @@ -1093,9 +1104,8 @@ xfs_ialloc( */ error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, ialloc_context, call_again, &ino); - if (error != 0) { + if (error) return error; - } if (*call_again || ino == NULLFSINO) { *ipp = NULL; return 0; @@ -1109,9 +1119,8 @@ xfs_ialloc( */ error = xfs_trans_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); - if (error != 0) { + if (error) return error; - } ASSERT(ip != NULL); ip->i_d.di_mode = (__uint16_t)mode; @@ -1192,13 +1201,12 @@ xfs_ialloc( flags |= XFS_ILOG_DEV; break; case S_IFREG: - if (pip && xfs_inode_is_filestream(pip)) { - error = xfs_filestream_associate(pip, ip); - if (error < 0) - return -error; - if (!error) - xfs_iflags_set(ip, XFS_IFILESTREAM); - } + /* + * we can't set up filestreams until after the VFS inode + * is set up properly. + */ + if (pip && xfs_inode_is_filestream(pip)) + filestreams = 1; /* fall through */ case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { @@ -1264,6 +1272,15 @@ xfs_ialloc( /* now that we have an i_mode we can setup inode ops and unlock */ xfs_setup_inode(ip); + /* now we have set up the vfs inode we can associate the filestream */ + if (filestreams) { + error = xfs_filestream_associate(pip, ip); + if (error < 0) + return -error; + if (!error) + xfs_iflags_set(ip, XFS_IFILESTREAM); + } + *ipp = ip; return 0; } @@ -2650,6 +2667,10 @@ xfs_idestroy_fork( * It must free the inode itself and any buffers allocated for * if_extents/if_data and if_broot. It must also free the lock * associated with the inode. + * + * Note: because we don't initialise everything on reallocation out + * of the zone, we must ensure we nullify everything correctly before + * freeing the structure. */ void xfs_idestroy( diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 6fd20fc179a..345b43a90eb 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -236,7 +236,6 @@ typedef struct xfs_inode { /* Inode linking and identification information. */ struct xfs_mount *i_mount; /* fs mount struct ptr */ struct list_head i_reclaim; /* reclaim list */ - struct inode *i_vnode; /* vnode backpointer */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ @@ -271,6 +270,10 @@ typedef struct xfs_inode { xfs_fsize_t i_size; /* in-memory size */ xfs_fsize_t i_new_size; /* size when write completes */ atomic_t i_iocount; /* outstanding I/O count */ + + /* VFS inode */ + struct inode i_vnode; /* embedded VFS inode */ + /* Trace buffers per inode. */ #ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ @@ -298,13 +301,13 @@ typedef struct xfs_inode { /* Convert from vfs inode to xfs inode */ static inline struct xfs_inode *XFS_I(struct inode *inode) { - return (struct xfs_inode *)inode->i_private; + return container_of(inode, struct xfs_inode, i_vnode); } /* convert from xfs inode to vfs inode */ static inline struct inode *VFS_I(struct xfs_inode *ip) { - return (struct inode *)ip->i_vnode; + return &ip->i_vnode; } /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index a6714579a41..7fb577c9f9d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2833,6 +2833,7 @@ xfs_reclaim( if (!ip->i_update_core && (ip->i_itemp == NULL)) { xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); + xfs_iflags_set(ip, XFS_IRECLAIMABLE); return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); } else { xfs_mount_t *mp = ip->i_mount; @@ -2841,8 +2842,6 @@ xfs_reclaim( XFS_MOUNT_ILOCK(mp); spin_lock(&ip->i_flags_lock); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - VFS_I(ip)->i_private = NULL; - ip->i_vnode = NULL; spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); XFS_MOUNT_IUNLOCK(mp); @@ -2857,10 +2856,6 @@ xfs_finish_reclaim( int sync_mode) { xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - struct inode *vp = VFS_I(ip); - - if (vp && VN_BAD(vp)) - goto reclaim; /* The hash lock here protects a thread in xfs_iget_core from * racing with us on linking the inode back with a vnode. @@ -2870,7 +2865,7 @@ xfs_finish_reclaim( write_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { + !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { spin_unlock(&ip->i_flags_lock); write_unlock(&pag->pag_ici_lock); if (locked) { @@ -2904,15 +2899,13 @@ xfs_finish_reclaim( * In the case of a forced shutdown we rely on xfs_iflush() to * wait for the inode to be unpinned before returning an error. */ - if (xfs_iflush(ip, sync_mode) == 0) { + if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { /* synchronize with xfs_iflush_done */ xfs_iflock(ip); xfs_ifunlock(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); - - reclaim: xfs_ireclaim(ip); return 0; } -- cgit v1.2.3 From 99fa8cb3c580d4445fe8fc239454e8f37a3b6847 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:36:40 +1100 Subject: [XFS] Prevent use-after-free caused by synchronous inode reclaim With the combined linux and XFS inode, we need to ensure that the combined structure is not freed before the generic code is finished with the inode. As it turns out, there is a case where the XFS inode is freed before the linux inode - when xfs_reclaim() is called from ->clear_inode() on a clean inode, the xfs inode is freed during that call. The generic code references the inode after the ->clear_inode() call, so this is a use after free situation. Fix the problem by moving the xfs_reclaim() call to ->destroy_inode() instead of in ->clear_inode(). This ensures the combined inode structure is not freed until after the generic code has finished with it. SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32324a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c6ef684bf2e..bb535a7737b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -875,13 +875,18 @@ xfs_fs_alloc_inode( } /* - * we need to provide an empty inode free function to prevent - * the generic code from trying to free our combined inode. + * Now that the generic code is guaranteed not to be accessing + * the linux inode, we can reclaim the inode. */ STATIC void xfs_fs_destroy_inode( struct inode *inode) { + xfs_inode_t *ip = XFS_I(inode); + + XFS_STATS_INC(vn_reclaim); + if (xfs_reclaim(ip)) + panic("%s: cannot reclaim 0x%p\n", __func__, inode); } /* @@ -958,22 +963,13 @@ xfs_fs_clear_inode( { xfs_inode_t *ip = XFS_I(inode); - /* - * ip can be null when xfs_iget_core calls xfs_idestroy if we - * find an inode with di_mode == 0 but without IGET_CREATE set. - */ - if (ip) { - xfs_itrace_entry(ip); - XFS_STATS_INC(vn_rele); - XFS_STATS_INC(vn_remove); - XFS_STATS_INC(vn_reclaim); - XFS_STATS_DEC(vn_active); - - xfs_inactive(ip); - xfs_iflags_clear(ip, XFS_IMODIFIED); - if (xfs_reclaim(ip)) - panic("%s: cannot reclaim 0x%p\n", __func__, inode); - } + xfs_itrace_entry(ip); + XFS_STATS_INC(vn_rele); + XFS_STATS_INC(vn_remove); + XFS_STATS_DEC(vn_active); + + xfs_inactive(ip); + xfs_iflags_clear(ip, XFS_IMODIFIED); } STATIC void -- cgit v1.2.3 From 493dca6178cf4a6ae2a16c602d6cb455bb7d31bd Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 17:36:52 +1100 Subject: [XFS] Fix build warning - xfs_fs_alloc_inode() needs a return statement SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32325a Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bb535a7737b..1d67d7f92a4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -872,6 +872,7 @@ xfs_fs_alloc_inode( struct super_block *sb) { BUG(); + return NULL; } /* -- cgit v1.2.3 From fce08f2f3bd0d08feeb4cea70e44aa3471d9bb4c Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:03 +1100 Subject: [XFS] move inode reclaim functions to xfs_sync.c Background inode reclaim is run by the xfssyncd. Move the reclaim worker functions to be close to the sync code as the are very similar in structure and are both run from the same background thread. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32329a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 91 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_sync.h | 3 ++ fs/xfs/xfs_inode.h | 2 - fs/xfs/xfs_vnodeops.c | 90 -------------------------------------------- 4 files changed, 94 insertions(+), 92 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b2b708254ae..79038ea55b0 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -583,3 +583,94 @@ xfs_syncd_stop( kthread_stop(mp->m_sync_task); } +int +xfs_finish_reclaim( + xfs_inode_t *ip, + int locked, + int sync_mode) +{ + xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); + + /* The hash lock here protects a thread in xfs_iget_core from + * racing with us on linking the inode back with a vnode. + * Once we have the XFS_IRECLAIM flag set it will not touch + * us. + */ + write_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + if (__xfs_iflags_test(ip, XFS_IRECLAIM) || + !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + if (locked) { + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + return 1; + } + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + xfs_put_perag(ip->i_mount, pag); + + /* + * If the inode is still dirty, then flush it out. If the inode + * is not in the AIL, then it will be OK to flush it delwri as + * long as xfs_iflush() does not keep any references to the inode. + * We leave that decision up to xfs_iflush() since it has the + * knowledge of whether it's OK to simply do a delwri flush of + * the inode or whether we need to wait until the inode is + * pulled from the AIL. + * We get the flush lock regardless, though, just to make sure + * we don't free it while it is being flushed. + */ + if (!locked) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_iflock(ip); + } + + /* + * In the case of a forced shutdown we rely on xfs_iflush() to + * wait for the inode to be unpinned before returning an error. + */ + if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { + /* synchronize with xfs_iflush_done */ + xfs_iflock(ip); + xfs_ifunlock(ip); + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_ireclaim(ip); + return 0; +} + +int +xfs_finish_reclaim_all( + xfs_mount_t *mp, + int noblock, + int mode) +{ + xfs_inode_t *ip, *n; + +restart: + XFS_MOUNT_ILOCK(mp); + list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { + if (noblock) { + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) + continue; + if (xfs_ipincount(ip) || + !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; + } + } + XFS_MOUNT_IUNLOCK(mp); + if (xfs_finish_reclaim(ip, noblock, mode)) + delay(1); + goto restart; + } + XFS_MOUNT_IUNLOCK(mp); + return 0; +} + + diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 3b49aa3bb5f..23117a17fde 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -45,4 +45,7 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); +int xfs_finish_reclaim(struct xfs_inode *ip, int locked, int sync_mode); +int xfs_finish_reclaim_all(struct xfs_mount *mp, int noblock, int mode); + #endif diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 345b43a90eb..64e50ff9ad2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -496,8 +496,6 @@ int xfs_isilocked(xfs_inode_t *, uint); uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); void xfs_ireclaim(xfs_inode_t *); -int xfs_finish_reclaim(xfs_inode_t *, int, int); -int xfs_finish_reclaim_all(struct xfs_mount *, int, int); /* * xfs_inode.c prototypes. diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 7fb577c9f9d..cdcc835bc5a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2849,96 +2849,6 @@ xfs_reclaim( return 0; } -int -xfs_finish_reclaim( - xfs_inode_t *ip, - int locked, - int sync_mode) -{ - xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - - /* The hash lock here protects a thread in xfs_iget_core from - * racing with us on linking the inode back with a vnode. - * Once we have the XFS_IRECLAIM flag set it will not touch - * us. - */ - write_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - if (locked) { - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } - return 1; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - xfs_put_perag(ip->i_mount, pag); - - /* - * If the inode is still dirty, then flush it out. If the inode - * is not in the AIL, then it will be OK to flush it delwri as - * long as xfs_iflush() does not keep any references to the inode. - * We leave that decision up to xfs_iflush() since it has the - * knowledge of whether it's OK to simply do a delwri flush of - * the inode or whether we need to wait until the inode is - * pulled from the AIL. - * We get the flush lock regardless, though, just to make sure - * we don't free it while it is being flushed. - */ - if (!locked) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - } - - /* - * In the case of a forced shutdown we rely on xfs_iflush() to - * wait for the inode to be unpinned before returning an error. - */ - if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { - /* synchronize with xfs_iflush_done */ - xfs_iflock(ip); - xfs_ifunlock(ip); - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); - return 0; -} - -int -xfs_finish_reclaim_all( - xfs_mount_t *mp, - int noblock, - int mode) -{ - xfs_inode_t *ip, *n; - -restart: - XFS_MOUNT_ILOCK(mp); - list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { - if (noblock) { - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) - continue; - if (xfs_ipincount(ip) || - !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } - } - XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, mode)) - delay(1); - goto restart; - } - XFS_MOUNT_IUNLOCK(mp); - return 0; -} - /* * xfs_alloc_file_space() * This routine allocates disk space for the given file. -- cgit v1.2.3 From 1dc3318ae1c1cc11f9fb8279a806de448e2b90e8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:15 +1100 Subject: [XFS] rename inode reclaim functions The function names xfs_finish_reclaim and xfs_finish_reclaim_all are not very descriptive of what they are reclaiming. Rename to xfs_reclaim_inode[s] to match the xfs_sync_inodes() function. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32330a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 10 +++++----- fs/xfs/linux-2.6/xfs_sync.h | 4 ++-- fs/xfs/xfs_mount.c | 2 +- fs/xfs/xfs_vnodeops.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 79038ea55b0..34413ceaea9 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -364,7 +364,7 @@ xfs_quiesce_fs( int count = 0, pincount; xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop @@ -505,7 +505,7 @@ xfs_sync_worker( if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* dgc: errors ignored here */ error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); @@ -584,7 +584,7 @@ xfs_syncd_stop( } int -xfs_finish_reclaim( +xfs_reclaim_inode( xfs_inode_t *ip, int locked, int sync_mode) @@ -645,7 +645,7 @@ xfs_finish_reclaim( } int -xfs_finish_reclaim_all( +xfs_reclaim_inodes( xfs_mount_t *mp, int noblock, int mode) @@ -665,7 +665,7 @@ restart: } } XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, mode)) + if (xfs_reclaim_inode(ip, noblock, mode)) delay(1); goto restart; } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 23117a17fde..c1bcd500509 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -45,7 +45,7 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); -int xfs_finish_reclaim(struct xfs_inode *ip, int locked, int sync_mode); -int xfs_finish_reclaim_all(struct xfs_mount *mp, int noblock, int mode); +int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); +int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); #endif diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 43e5917465a..3704baefe2e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1235,7 +1235,7 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC); XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index cdcc835bc5a..07945634923 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2834,7 +2834,7 @@ xfs_reclaim( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); xfs_iflags_set(ip, XFS_IRECLAIMABLE); - return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); + return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); } else { xfs_mount_t *mp = ip->i_mount; -- cgit v1.2.3 From 396beb85311689e38634926058d9a3bb0576ca8a Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:26 +1100 Subject: [XFS] mark inodes for reclaim via a tag in the inode radix tree Prepare for removing the deleted inode list by marking inodes for reclaim in the inode radix trees so that we can use the radix trees to find reclaimable inodes. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32331a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 41 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_sync.h | 4 ++++ fs/xfs/xfs_ag.h | 5 +++++ fs/xfs/xfs_iget.c | 3 +++ fs/xfs/xfs_vnodeops.c | 1 + 5 files changed, 54 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 34413ceaea9..9e7f4dccab7 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -644,6 +644,47 @@ xfs_reclaim_inode( return 0; } +void +xfs_inode_set_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + radix_tree_tag_set(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); +} + +void +xfs_inode_clear_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + __xfs_inode_clear_reclaim_tag(mp, pag, ip); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + int xfs_reclaim_inodes( xfs_mount_t *mp, diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index c1bcd500509..5f6de1efe1f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -48,4 +48,8 @@ void xfs_flush_device(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); +void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); +void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); +void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, + struct xfs_inode *ip); #endif diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 729ee3eb39a..2bfd8632914 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -204,6 +204,11 @@ typedef struct xfs_perag #endif } xfs_perag_t; +/* + * tags for inode radix tree + */ +#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ + #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index c4414e8bce8..a0387f14c20 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -91,6 +91,9 @@ xfs_iget_cache_hit( } xfs_iflags_set(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_IRECLAIMABLE); + + /* clear the radix tree reclaim flag as well. */ + __xfs_inode_clear_reclaim_tag(mp, pag, ip); read_unlock(&pag->pag_ici_lock); XFS_MOUNT_ILOCK(mp); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 07945634923..f89a73eb016 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2845,6 +2845,7 @@ xfs_reclaim( spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); XFS_MOUNT_IUNLOCK(mp); + xfs_inode_set_reclaim_tag(ip); } return 0; } -- cgit v1.2.3 From 7a3be02baef7bdec43965103441bde5de4dd8601 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:37 +1100 Subject: [XFS] use the inode radix tree for reclaiming inodes Use the reclaim tag to walk the radix tree and find the inodes under reclaim. This was the only user of the deleted inode list. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32333a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 81 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 9e7f4dccab7..bbb40e27840 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -685,32 +685,93 @@ xfs_inode_clear_reclaim_tag( xfs_put_perag(mp, pag); } -int -xfs_reclaim_inodes( + +STATIC void +xfs_reclaim_inodes_ag( xfs_mount_t *mp, - int noblock, + int ag, + int noblock, int mode) { - xfs_inode_t *ip, *n; + xfs_inode_t *ip = NULL; + xfs_perag_t *pag = &mp->m_perag[ag]; + int nr_found; + int first_index; + int skipped; restart: - XFS_MOUNT_ILOCK(mp); - list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { + first_index = 0; + skipped = 0; + do { + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, + (void**)&ip, first_index, 1, + XFS_ICI_RECLAIM_TAG); + + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* update the index for the next lookup */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + + ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE|XFS_IRECLAIM))); + + /* ignore if already under reclaim */ + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { + read_unlock(&pag->pag_ici_lock); + continue; + } + if (noblock) { - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + read_unlock(&pag->pag_ici_lock); continue; + } if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { xfs_iunlock(ip, XFS_ILOCK_EXCL); + read_unlock(&pag->pag_ici_lock); continue; } } - XFS_MOUNT_IUNLOCK(mp); + read_unlock(&pag->pag_ici_lock); + + /* + * hmmm - this is an inode already in reclaim. Do + * we even bother catching it here? + */ if (xfs_reclaim_inode(ip, noblock, mode)) - delay(1); + skipped++; + } while (nr_found); + + if (skipped) { + delay(1); goto restart; } - XFS_MOUNT_IUNLOCK(mp); + return; + +} + +int +xfs_reclaim_inodes( + xfs_mount_t *mp, + int noblock, + int mode) +{ + int i; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + xfs_reclaim_inodes_ag(mp, i, noblock, mode); + } return 0; } -- cgit v1.2.3 From 116545130cbc5214523c2f994a11c81ef9eb9186 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:49 +1100 Subject: [XFS] kill deleted inodes list Now that the deleted inodes list is unused, kill it. This also removes the i_reclaim list head from the xfs_inode, shrinking it by two pointers. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32334a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 2 -- fs/xfs/linux-2.6/xfs_sync.c | 6 ++++++ fs/xfs/xfs_iget.c | 8 -------- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_inode.h | 1 - fs/xfs/xfs_mount.c | 1 - fs/xfs/xfs_mount.h | 5 +---- fs/xfs/xfs_vnodeops.c | 12 +----------- 8 files changed, 10 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1d67d7f92a4..206a949e387 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -913,7 +913,6 @@ xfs_fs_inode_init_once( atomic_set(&ip->i_iocount, 0); atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); - INIT_LIST_HEAD(&ip->i_reclaim); init_waitqueue_head(&ip->i_ipin_wait); /* * Because we want to use a counting completion, complete @@ -1546,7 +1545,6 @@ xfs_fs_fill_super( goto out_free_args; spin_lock_init(&mp->m_sb_lock); - mutex_init(&mp->m_ilock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); INIT_LIST_HEAD(&mp->m_sync_list); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index bbb40e27840..22006b5733c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -644,6 +644,11 @@ xfs_reclaim_inode( return 0; } +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) @@ -655,6 +660,7 @@ xfs_inode_set_reclaim_tag( spin_lock(&ip->i_flags_lock); radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index a0387f14c20..800133805ca 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -95,10 +95,6 @@ xfs_iget_cache_hit( /* clear the radix tree reclaim flag as well. */ __xfs_inode_clear_reclaim_tag(mp, pag, ip); read_unlock(&pag->pag_ici_lock); - - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); - XFS_MOUNT_IUNLOCK(mp); } else if (!igrab(VFS_I(ip))) { /* If the VFS inode is being torn down, pause and try again. */ error = EAGAIN; @@ -419,11 +415,7 @@ xfs_iextract( write_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); - /* Deal with the deleted inodes list */ - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); mp->m_ireclaims++; - XFS_MOUNT_IUNLOCK(mp); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 99d9118c4a4..4eb629f0513 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -811,7 +811,7 @@ xfs_inode_alloc( ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(list_empty(&ip->i_reclaim)); + ASSERT(completion_done(&ip->i_flush)); /* * initialise the VFS inode here to get failures @@ -2729,7 +2729,7 @@ xfs_idestroy( ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(list_empty(&ip->i_reclaim)); + ASSERT(completion_done(&ip->i_flush)); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 64e50ff9ad2..a5aeb9cfeae 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -235,7 +235,6 @@ typedef struct dm_attrs_s { typedef struct xfs_inode { /* Inode linking and identification information. */ struct xfs_mount *i_mount; /* fs mount struct ptr */ - struct list_head i_reclaim; /* reclaim list */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3704baefe2e..177976dfea0 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -580,7 +580,6 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; - INIT_LIST_HEAD(&mp->m_del_inodes); /* * Setup for attributes, in case they get created. diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4e62802b6ab..67cf0b2bb84 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -248,8 +248,6 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - struct list_head m_del_inodes; /* inodes to reclaim */ - mutex_t m_ilock; /* inode list mutex */ uint m_ireclaims; /* count of calls to reclaim*/ uint m_readio_log; /* min read size log bytes */ uint m_readio_blocks; /* min read size blocks */ @@ -312,8 +310,7 @@ typedef struct xfs_mount { int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ - __uint8_t m_inode_quiesce;/* call quiesce on new inodes. - field governed by m_ilock */ + __uint8_t m_inode_quiesce;/* call quiesce on new inodes. */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ int m_dirblksize; /* directory block sz--bytes */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index f89a73eb016..1d15a320b9a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2835,18 +2835,8 @@ xfs_reclaim( xfs_iflock(ip); xfs_iflags_set(ip, XFS_IRECLAIMABLE); return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); - } else { - xfs_mount_t *mp = ip->i_mount; - - /* Protect sync and unpin from us */ - XFS_MOUNT_ILOCK(mp); - spin_lock(&ip->i_flags_lock); - __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - spin_unlock(&ip->i_flags_lock); - list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); - XFS_MOUNT_IUNLOCK(mp); - xfs_inode_set_reclaim_tag(ip); } + xfs_inode_set_reclaim_tag(ip); return 0; } -- cgit v1.2.3 From 8c38ab032094ff1d903c79db689607b1ebae13ca Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:00 +1100 Subject: [XFS] Prevent looping in xfs_sync_inodes_ag If the last block of the AG has inodes in it and the AG is an exactly power-of-2 size then the last inode in the AG points to the last block in the AG. If we try to find the next inode in the AG by adding one to the inode number, we increment the inode number past the size of the AG. The result is that the macro XFS_INO_TO_AGINO() will strip the AG portion of the inode number and return an inode number of zero. That is, instead of terminating the lookup loop because we hit the inode number went outside the valid range for the AG, the search index returns to zero and we start traversing the radix tree from the start again. This results in an endless loop in xfs_sync_inodes_ag(). Fix it be detecting if the new search index decreases as a result of incrementing the current inode number. That indicate an overflow and hence that we have finished processing the AG so we can terminate the loop. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32335a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 22006b5733c..ee1648b179f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -59,7 +59,7 @@ xfs_sync_inodes_ag( { xfs_perag_t *pag = &mp->m_perag[ag]; int nr_found; - int first_index = 0; + uint32_t first_index = 0; int error = 0; int last_error = 0; int fflag = XFS_B_ASYNC; @@ -97,8 +97,17 @@ xfs_sync_inodes_ag( break; } - /* update the index for the next lookup */ + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } /* * skip inodes in reclaim. Let xfs_syncsub do that for @@ -702,7 +711,7 @@ xfs_reclaim_inodes_ag( xfs_inode_t *ip = NULL; xfs_perag_t *pag = &mp->m_perag[ag]; int nr_found; - int first_index; + uint32_t first_index; int skipped; restart: @@ -724,8 +733,17 @@ restart: break; } - /* update the index for the next lookup */ + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE|XFS_IRECLAIM))); -- cgit v1.2.3 From a7444053fb3ebd3d905e3c7a7bd5ea80a54b083a Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:12 +1100 Subject: [XFS] Account for allocated blocks when expanding directories When we create a directory, we reserve a number of blocks for the maximum possible expansion of of the directory due to various btree splits, freespace allocation, etc. Unfortunately, each allocation is not reflected in the total number of blocks still available to the transaction, so the maximal reservation is used over and over again. This leads to problems where an allocation group has only enough blocks for *some* of the allocations required for the directory modification. After the first N allocations, the remaining blocks in the allocation group drops below the total reservation, and subsequent allocations fail because the allocator will not allow the allocation to proceed if the AG does not have the enough blocks available for the entire allocation total. This results in an ENOSPC occurring after an allocation has already occurred. This results in aborting the directory operation (leaving the directory in an inconsistent state) and cancelling a dirty transaction, which results in a filesystem shutdown. Avoid the problem by reflecting the number of blocks allocated in any directory expansion in the total number of blocks available to the modification in progress. This prevents a directory modification from being aborted part way through with an ENOSPC. SGI-PV: 988144 SGI-Modid: xfs-linux-melb:xfs-kern:32340a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_da_btree.c | 5 +++++ fs/xfs/xfs_dir2.c | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 9e561a9cefc..a11a8390bf6 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1566,11 +1566,14 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) int nmap, error, w, count, c, got, i, mapi; xfs_trans_t *tp; xfs_mount_t *mp; + xfs_drfsbno_t nblks; dp = args->dp; mp = dp->i_mount; w = args->whichfork; tp = args->trans; + nblks = dp->i_d.di_nblocks; + /* * For new directories adjust the file offset and block count. */ @@ -1647,6 +1650,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) } if (mapp != &map) kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *new_blkno = (xfs_dablk_t)bno; return 0; } diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 80e0dc51361..1afb12278b8 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -525,11 +525,13 @@ xfs_dir2_grow_inode( xfs_mount_t *mp; int nmap; /* number of bmap entries */ xfs_trans_t *tp; + xfs_drfsbno_t nblks; xfs_dir2_trace_args_s("grow_inode", args, space); dp = args->dp; tp = args->trans; mp = dp->i_mount; + nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ @@ -622,7 +624,11 @@ xfs_dir2_grow_inode( */ if (mapp != &map) kmem_free(mapp); + + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); + /* * Update file's size if this is the data space and it grew. */ -- cgit v1.2.3 From 82fa9012458d867936d7bf130e6e14bdebc6873c Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:26 +1100 Subject: [XFS] Allocate the struct xfs_ail Rather than embedding the struct xfs_ail in the struct xfs_mount, allocate it during AIL initialisation. Add a back pointer to the struct xfs_ail so that we can pass around the xfs_ail and still be able to access the xfs_mount if need be. This is th first step involved in isolating the AIL implementation from the surrounding filesystem code. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32346a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 28 +++++++------- fs/xfs/xfs_mount.h | 10 +---- fs/xfs/xfs_trans_ail.c | 87 +++++++++++++++++++++++++------------------- fs/xfs/xfs_trans_priv.h | 17 ++++++--- 4 files changed, 77 insertions(+), 65 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 206a949e387..655508ce2b7 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -814,18 +814,18 @@ xfs_setup_devices( */ void xfsaild_wakeup( - xfs_mount_t *mp, + struct xfs_ail *ailp, xfs_lsn_t threshold_lsn) { - mp->m_ail.xa_target = threshold_lsn; - wake_up_process(mp->m_ail.xa_task); + ailp->xa_target = threshold_lsn; + wake_up_process(ailp->xa_task); } int xfsaild( void *data) { - xfs_mount_t *mp = (xfs_mount_t *)data; + struct xfs_ail *ailp = data; xfs_lsn_t last_pushed_lsn = 0; long tout = 0; @@ -837,11 +837,11 @@ xfsaild( /* swsusp */ try_to_freeze(); - ASSERT(mp->m_log); - if (XFS_FORCED_SHUTDOWN(mp)) + ASSERT(ailp->xa_mount->m_log); + if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) continue; - tout = xfsaild_push(mp, &last_pushed_lsn); + tout = xfsaild_push(ailp, &last_pushed_lsn); } return 0; @@ -849,20 +849,20 @@ xfsaild( int xfsaild_start( - xfs_mount_t *mp) + struct xfs_ail *ailp) { - mp->m_ail.xa_target = 0; - mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild"); - if (IS_ERR(mp->m_ail.xa_task)) - return -PTR_ERR(mp->m_ail.xa_task); + ailp->xa_target = 0; + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild"); + if (IS_ERR(ailp->xa_task)) + return -PTR_ERR(ailp->xa_task); return 0; } void xfsaild_stop( - xfs_mount_t *mp) + struct xfs_ail *ailp) { - kthread_stop(mp->m_ail.xa_task); + kthread_stop(ailp->xa_task); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 67cf0b2bb84..287f90011ed 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -63,6 +63,7 @@ struct xfs_extdelta; struct xfs_swapext; struct xfs_mru_cache; struct xfs_nameops; +struct xfs_ail; /* * Prototypes and functions for the Data Migration subsystem. @@ -224,18 +225,11 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) #endif -typedef struct xfs_ail { - struct list_head xa_ail; - uint xa_gen; - struct task_struct *xa_task; - xfs_lsn_t xa_target; -} xfs_ail_t; - typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ spinlock_t m_ail_lock; /* fs AIL mutex */ - xfs_ail_t m_ail; /* fs active log item list */ + struct xfs_ail *m_ail; /* fs active log item list */ xfs_sb_t m_sb; /* copy of fs superblock */ spinlock_t m_sb_lock; /* sb counter lock */ struct xfs_buf *m_sb_bp; /* buffer for superblock */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 1f77c00af56..db72b52cd42 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -28,13 +28,13 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" -STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *); -STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *); -STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *); -STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *); +STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); +STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); +STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); +STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); #ifdef DEBUG -STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *); +STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); #else #define xfs_ail_check(a,l) #endif /* DEBUG */ @@ -57,7 +57,7 @@ xfs_trans_tail_ail( xfs_log_item_t *lip; spin_lock(&mp->m_ail_lock); - lip = xfs_ail_min(&mp->m_ail); + lip = xfs_ail_min(mp->m_ail); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { @@ -91,10 +91,10 @@ xfs_trans_push_ail( { xfs_log_item_t *lip; - lip = xfs_ail_min(&mp->m_ail); + lip = xfs_ail_min(mp->m_ail); if (lip && !XFS_FORCED_SHUTDOWN(mp)) { - if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) - xfsaild_wakeup(mp, threshold_lsn); + if (XFS_LSN_CMP(threshold_lsn, mp->m_ail->xa_target) > 0) + xfsaild_wakeup(mp->m_ail, threshold_lsn); } } @@ -111,12 +111,12 @@ xfs_trans_first_push_ail( { xfs_log_item_t *lip; - lip = xfs_ail_min(&mp->m_ail); - *gen = (int)mp->m_ail.xa_gen; + lip = xfs_ail_min(mp->m_ail); + *gen = (int)mp->m_ail->xa_gen; if (lsn == 0) return lip; - list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) { + list_for_each_entry(lip, &mp->m_ail->xa_ail, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) return lip; } @@ -129,17 +129,18 @@ xfs_trans_first_push_ail( */ long xfsaild_push( - xfs_mount_t *mp, + struct xfs_ail *ailp, xfs_lsn_t *last_lsn) { long tout = 1000; /* milliseconds */ xfs_lsn_t last_pushed_lsn = *last_lsn; - xfs_lsn_t target = mp->m_ail.xa_target; + xfs_lsn_t target = ailp->xa_target; xfs_lsn_t lsn; xfs_log_item_t *lip; int gen; int restarts; int flush_log, count, stuck; + xfs_mount_t *mp = ailp->xa_mount; #define XFS_TRANS_PUSH_AIL_RESTARTS 10 @@ -331,7 +332,7 @@ xfs_trans_unlocked_item( * the call to xfs_log_move_tail() doesn't do anything if there's * not enough free space to wake people up so we're safe calling it. */ - min_lip = xfs_ail_min(&mp->m_ail); + min_lip = xfs_ail_min(mp->m_ail); if (min_lip == lip) xfs_log_move_tail(mp, 1); @@ -362,10 +363,10 @@ xfs_trans_update_ail( xfs_log_item_t *dlip=NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ - mlip = xfs_ail_min(&mp->m_ail); + mlip = xfs_ail_min(mp->m_ail); if (lip->li_flags & XFS_LI_IN_AIL) { - dlip = xfs_ail_delete(&mp->m_ail, lip); + dlip = xfs_ail_delete(mp->m_ail, lip); ASSERT(dlip == lip); } else { lip->li_flags |= XFS_LI_IN_AIL; @@ -373,11 +374,11 @@ xfs_trans_update_ail( lip->li_lsn = lsn; - xfs_ail_insert(&mp->m_ail, lip); - mp->m_ail.xa_gen++; + xfs_ail_insert(mp->m_ail, lip); + mp->m_ail->xa_gen++; if (mlip == dlip) { - mlip = xfs_ail_min(&mp->m_ail); + mlip = xfs_ail_min(mp->m_ail); spin_unlock(&mp->m_ail_lock); xfs_log_move_tail(mp, mlip->li_lsn); } else { @@ -411,17 +412,17 @@ xfs_trans_delete_ail( xfs_log_item_t *mlip; if (lip->li_flags & XFS_LI_IN_AIL) { - mlip = xfs_ail_min(&mp->m_ail); - dlip = xfs_ail_delete(&mp->m_ail, lip); + mlip = xfs_ail_min(mp->m_ail); + dlip = xfs_ail_delete(mp->m_ail, lip); ASSERT(dlip == lip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; - mp->m_ail.xa_gen++; + mp->m_ail->xa_gen++; if (mlip == dlip) { - mlip = xfs_ail_min(&mp->m_ail); + mlip = xfs_ail_min(mp->m_ail); spin_unlock(&mp->m_ail_lock); xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); } else { @@ -459,8 +460,8 @@ xfs_trans_first_ail( { xfs_log_item_t *lip; - lip = xfs_ail_min(&mp->m_ail); - *gen = (int)mp->m_ail.xa_gen; + lip = xfs_ail_min(mp->m_ail); + *gen = (int)mp->m_ail->xa_gen; return lip; } @@ -482,11 +483,11 @@ xfs_trans_next_ail( xfs_log_item_t *nlip; ASSERT(mp && lip && gen); - if (mp->m_ail.xa_gen == *gen) { - nlip = xfs_ail_next(&mp->m_ail, lip); + if (mp->m_ail->xa_gen == *gen) { + nlip = xfs_ail_next(mp->m_ail, lip); } else { - nlip = xfs_ail_min(&mp->m_ail); - *gen = (int)mp->m_ail.xa_gen; + nlip = xfs_ail_min(mp->m_ail); + *gen = (int)mp->m_ail->xa_gen; if (restarts != NULL) { XFS_STATS_INC(xs_push_ail_restarts); (*restarts)++; @@ -515,15 +516,25 @@ int xfs_trans_ail_init( xfs_mount_t *mp) { - INIT_LIST_HEAD(&mp->m_ail.xa_ail); - return xfsaild_start(mp); + struct xfs_ail *ailp; + + ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); + if (!ailp) + return ENOMEM; + + ailp->xa_mount = mp; + INIT_LIST_HEAD(&ailp->xa_ail); + return xfsaild_start(ailp); } void xfs_trans_ail_destroy( xfs_mount_t *mp) { - xfsaild_stop(mp); + struct xfs_ail *ailp = mp->m_ail; + + xfsaild_stop(ailp); + kmem_free(ailp); } /* @@ -534,7 +545,7 @@ xfs_trans_ail_destroy( */ STATIC void xfs_ail_insert( - xfs_ail_t *ailp, + struct xfs_ail *ailp, xfs_log_item_t *lip) /* ARGSUSED */ { @@ -568,7 +579,7 @@ xfs_ail_insert( /*ARGSUSED*/ STATIC xfs_log_item_t * xfs_ail_delete( - xfs_ail_t *ailp, + struct xfs_ail *ailp, xfs_log_item_t *lip) /* ARGSUSED */ { @@ -585,7 +596,7 @@ xfs_ail_delete( */ STATIC xfs_log_item_t * xfs_ail_min( - xfs_ail_t *ailp) + struct xfs_ail *ailp) /* ARGSUSED */ { if (list_empty(&ailp->xa_ail)) @@ -601,7 +612,7 @@ xfs_ail_min( */ STATIC xfs_log_item_t * xfs_ail_next( - xfs_ail_t *ailp, + struct xfs_ail *ailp, xfs_log_item_t *lip) /* ARGSUSED */ { @@ -617,7 +628,7 @@ xfs_ail_next( */ STATIC void xfs_ail_check( - xfs_ail_t *ailp, + struct xfs_ail *ailp, xfs_log_item_t *lip) { xfs_log_item_t *prev_lip; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 3c748c456ed..98317fdc33b 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -56,13 +56,20 @@ struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *); struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *, struct xfs_log_item *, int *, int *); - /* * AIL push thread support */ -long xfsaild_push(struct xfs_mount *, xfs_lsn_t *); -void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t); -int xfsaild_start(struct xfs_mount *); -void xfsaild_stop(struct xfs_mount *); +struct xfs_ail { + struct xfs_mount *xa_mount; + struct list_head xa_ail; + uint xa_gen; + struct task_struct *xa_task; + xfs_lsn_t xa_target; +}; + +long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); +void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); +int xfsaild_start(struct xfs_ail *); +void xfsaild_stop(struct xfs_ail *); #endif /* __XFS_TRANS_PRIV_H__ */ -- cgit v1.2.3 From 27d8d5fe0ef9daeaafbdd32b14b32a2211930062 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:39 +1100 Subject: [XFS] Use a cursor for AIL traversal. To replace the current generation number ensuring sanity of the AIL traversal, replace it with an external cursor that is linked to the AIL. Basically, we store the next item in the cursor whenever we want to drop the AIL lock to do something to the current item. When we regain the lock. the current item may already be free, so we can't reference it, but the next item in the traversal is already held in the cursor. When we move or delete an object, we search all the active cursors and if there is an item match we clear the cursor(s) that point to the object. This forces the traversal to restart transparently. We don't invalidate the cursor on insert because the cursor still points to a valid item. If the intem is inserted between the current item and the cursor it does not matter; the traversal is considered to be past the insertion point so it will be picked up in the next traversal. Hence traversal restarts pretty much disappear altogether with this method of traversal, which should substantially reduce the overhead of pushing on a busy AIL. Version 2 o add restart logic o comment cursor interface o minor cleanups SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32347a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 4 +- fs/xfs/xfs_log_recover.c | 61 +++++--------- fs/xfs/xfs_trans_ail.c | 207 ++++++++++++++++++++++++++++++++++------------- fs/xfs/xfs_trans_priv.h | 55 ++++++++++--- 4 files changed, 218 insertions(+), 109 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0b02c644355..4184085d44a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -900,7 +900,7 @@ xfs_log_move_tail(xfs_mount_t *mp, int xfs_log_need_covered(xfs_mount_t *mp) { - int needed = 0, gen; + int needed = 0; xlog_t *log = mp->m_log; if (!xfs_fs_writable(mp)) @@ -909,7 +909,7 @@ xfs_log_need_covered(xfs_mount_t *mp) spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) - && !xfs_trans_first_ail(mp, &gen) + && !xfs_trans_first_ail(mp, NULL) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 199c8ea3647..37ba4899f3e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -54,10 +54,8 @@ STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, xlog_recover_item_t *item); #if defined(DEBUG) STATIC void xlog_recover_check_summary(xlog_t *); -STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int); #else #define xlog_recover_check_summary(log) -#define xlog_recover_check_ail(mp, lip, gen) #endif @@ -2710,8 +2708,8 @@ xlog_recover_do_efd_trans( xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; - int gen; __uint64_t efi_id; + struct xfs_ail_cursor cur; if (pass == XLOG_RECOVER_PASS1) { return; @@ -2730,7 +2728,8 @@ xlog_recover_do_efd_trans( */ mp = log->l_mp; spin_lock(&mp->m_ail_lock); - lip = xfs_trans_first_ail(mp, &gen); + xfs_trans_ail_cursor_init(mp->m_ail, &cur); + lip = xfs_trans_first_ail(mp, &cur); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { efip = (xfs_efi_log_item_t *)lip; @@ -2741,11 +2740,13 @@ xlog_recover_do_efd_trans( */ xfs_trans_delete_ail(mp, lip); xfs_efi_item_free(efip); - return; + spin_lock(&mp->m_ail_lock); + break; } } - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + lip = xfs_trans_next_ail(mp, &cur); } + xfs_trans_ail_cursor_done(mp->m_ail, &cur); spin_unlock(&mp->m_ail_lock); } @@ -3029,33 +3030,6 @@ abort_error: return error; } -/* - * Verify that once we've encountered something other than an EFI - * in the AIL that there are no more EFIs in the AIL. - */ -#if defined(DEBUG) -STATIC void -xlog_recover_check_ail( - xfs_mount_t *mp, - xfs_log_item_t *lip, - int gen) -{ - int orig_gen = gen; - - do { - ASSERT(lip->li_type != XFS_LI_EFI); - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); - /* - * The check will be bogus if we restart from the - * beginning of the AIL, so ASSERT that we don't. - * We never should since we're holding the AIL lock - * the entire time. - */ - ASSERT(gen == orig_gen); - } while (lip != NULL); -} -#endif /* DEBUG */ - /* * When this is called, all of the EFIs which did not have * corresponding EFDs should be in the AIL. What we do now @@ -3080,20 +3054,25 @@ xlog_recover_process_efis( { xfs_log_item_t *lip; xfs_efi_log_item_t *efip; - int gen; xfs_mount_t *mp; int error = 0; + struct xfs_ail_cursor cur; mp = log->l_mp; spin_lock(&mp->m_ail_lock); - lip = xfs_trans_first_ail(mp, &gen); + xfs_trans_ail_cursor_init(mp->m_ail, &cur); + lip = xfs_trans_first_ail(mp, &cur); while (lip != NULL) { /* * We're done when we see something other than an EFI. + * There should be no EFIs left in the AIL now. */ if (lip->li_type != XFS_LI_EFI) { - xlog_recover_check_ail(mp, lip, gen); +#ifdef DEBUG + for (; lip; lip = xfs_trans_next_ail(mp, &cur)) + ASSERT(lip->li_type != XFS_LI_EFI); +#endif break; } @@ -3102,17 +3081,19 @@ xlog_recover_process_efis( */ efip = (xfs_efi_log_item_t *)lip; if (efip->efi_flags & XFS_EFI_RECOVERED) { - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + lip = xfs_trans_next_ail(mp, &cur); continue; } spin_unlock(&mp->m_ail_lock); error = xlog_recover_process_efi(mp, efip); - if (error) - return error; spin_lock(&mp->m_ail_lock); - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + if (error) + goto out; + lip = xfs_trans_next_ail(mp, &cur); } +out: + xfs_trans_ail_cursor_done(mp->m_ail, &cur); spin_unlock(&mp->m_ail_lock); return error; } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index db72b52cd42..7b8bfcf1d3d 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -98,6 +98,115 @@ xfs_trans_push_ail( } } +/* + * AIL traversal cursor initialisation. + * + * The cursor keeps track of where our current traversal is up + * to by tracking the next ƣtem in the list for us. However, for + * this to be safe, removing an object from the AIL needs to invalidate + * any cursor that points to it. hence the traversal cursor needs to + * be linked to the struct xfs_ail so that deletion can search all the + * active cursors for invalidation. + * + * We don't link the push cursor because it is embedded in the struct + * xfs_ail and hence easily findable. + */ +void +xfs_trans_ail_cursor_init( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur) +{ + cur->item = NULL; + if (cur == &ailp->xa_cursors) + return; + + cur->next = ailp->xa_cursors.next; + ailp->xa_cursors.next = cur; +} + +/* + * Set the cursor to the next item, because when we look + * up the cursor the current item may have been freed. + */ +STATIC void +xfs_trans_ail_cursor_set( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + struct xfs_log_item *lip) +{ + if (lip) + cur->item = xfs_ail_next(ailp, lip); +} + +/* + * Get the next item in the traversal and advance the cursor. + * If the cursor was invalidated (inidicated by a lip of 1), + * restart the traversal. + */ +STATIC struct xfs_log_item * +xfs_trans_ail_cursor_next( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur) +{ + struct xfs_log_item *lip = cur->item; + + if ((__psint_t)lip & 1) + lip = xfs_ail_min(ailp); + xfs_trans_ail_cursor_set(ailp, cur, lip); + return lip; +} + +/* + * Invalidate any cursor that is pointing to this item. This is + * called when an item is removed from the AIL. Any cursor pointing + * to this object is now invalid and the traversal needs to be + * terminated so it doesn't reference a freed object. We set the + * cursor item to a value of 1 so we can distinguish between an + * invalidation and the end of the list when getting the next item + * from the cursor. + */ +STATIC void +xfs_trans_ail_cursor_clear( + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_ail_cursor *cur; + + /* need to search all cursors */ + for (cur = &ailp->xa_cursors; cur; cur = cur->next) { + if (cur->item == lip) + cur->item = (struct xfs_log_item *) + ((__psint_t)cur->item | 1); + } +} + +/* + * Now that the traversal is complete, we need to remove the cursor + * from the list of traversing cursors. Avoid removing the embedded + * push cursor, but use the fact it is alway present to make the + * list deletion simple. + */ +void +xfs_trans_ail_cursor_done( + struct xfs_ail *ailp, + struct xfs_ail_cursor *done) +{ + struct xfs_ail_cursor *prev = NULL; + struct xfs_ail_cursor *cur; + + done->item = NULL; + if (done == &ailp->xa_cursors) + return; + prev = &ailp->xa_cursors; + for (cur = prev->next; cur; prev = cur, cur = prev->next) { + if (cur == done) { + prev->next = cur->next; + break; + } + } + ASSERT(cur); +} + /* * Return the item in the AIL with the current lsn. * Return the current tree generation number for use @@ -105,20 +214,22 @@ xfs_trans_push_ail( */ STATIC xfs_log_item_t * xfs_trans_first_push_ail( - xfs_mount_t *mp, - int *gen, - xfs_lsn_t lsn) + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn) { - xfs_log_item_t *lip; + xfs_log_item_t *lip; - lip = xfs_ail_min(mp->m_ail); - *gen = (int)mp->m_ail->xa_gen; + lip = xfs_ail_min(ailp); + xfs_trans_ail_cursor_set(ailp, cur, lip); if (lsn == 0) return lip; - list_for_each_entry(lip, &mp->m_ail->xa_ail, li_ail) { - if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) + list_for_each_entry(lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) { + xfs_trans_ail_cursor_set(ailp, cur, lip); return lip; + } } return NULL; @@ -137,22 +248,21 @@ xfsaild_push( xfs_lsn_t target = ailp->xa_target; xfs_lsn_t lsn; xfs_log_item_t *lip; - int gen; - int restarts; int flush_log, count, stuck; xfs_mount_t *mp = ailp->xa_mount; - -#define XFS_TRANS_PUSH_AIL_RESTARTS 10 + struct xfs_ail_cursor *cur = &ailp->xa_cursors; spin_lock(&mp->m_ail_lock); - lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn); + xfs_trans_ail_cursor_init(ailp, cur); + lip = xfs_trans_first_push_ail(ailp, cur, *last_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. */ + xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&mp->m_ail_lock); last_pushed_lsn = 0; - goto out; + return tout; } XFS_STATS_INC(xs_push_ail); @@ -170,7 +280,7 @@ xfsaild_push( */ tout = 10; lsn = lip->li_lsn; - flush_log = stuck = count = restarts = 0; + flush_log = stuck = count = 0; while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { int lock_result; /* @@ -245,13 +355,12 @@ xfsaild_push( if (stuck > 100) break; - lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); + lip = xfs_trans_ail_cursor_next(ailp, cur); if (lip == NULL) break; - if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS) - break; lsn = lip->li_lsn; } + xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&mp->m_ail_lock); if (flush_log) { @@ -275,8 +384,7 @@ xfsaild_push( */ tout += 20; last_pushed_lsn = 0; - } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) || - ((stuck * 100) / count > 90)) { + } else if ((stuck * 100) / count > 90) { /* * Either there is a lot of contention on the AIL or we * are stuck due to operations in progress. "Stuck" in this @@ -288,7 +396,6 @@ xfsaild_push( */ tout += 10; } -out: *last_lsn = last_pushed_lsn; return tout; } /* xfsaild_push */ @@ -348,9 +455,6 @@ xfs_trans_unlocked_item( * we move in the AIL is the minimum one, update the tail lsn in the * log manager. * - * Increment the AIL's generation count to indicate that the tree - * has changed. - * * This function must be called with the AIL lock held. The lock * is dropped before returning. */ @@ -368,14 +472,13 @@ xfs_trans_update_ail( if (lip->li_flags & XFS_LI_IN_AIL) { dlip = xfs_ail_delete(mp->m_ail, lip); ASSERT(dlip == lip); + xfs_trans_ail_cursor_clear(mp->m_ail, dlip); } else { lip->li_flags |= XFS_LI_IN_AIL; } lip->li_lsn = lsn; - xfs_ail_insert(mp->m_ail, lip); - mp->m_ail->xa_gen++; if (mlip == dlip) { mlip = xfs_ail_min(mp->m_ail); @@ -415,11 +518,11 @@ xfs_trans_delete_ail( mlip = xfs_ail_min(mp->m_ail); dlip = xfs_ail_delete(mp->m_ail, lip); ASSERT(dlip == lip); + xfs_trans_ail_cursor_clear(mp->m_ail, dlip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; - mp->m_ail->xa_gen++; if (mlip == dlip) { mlip = xfs_ail_min(mp->m_ail); @@ -455,46 +558,29 @@ xfs_trans_delete_ail( */ xfs_log_item_t * xfs_trans_first_ail( - xfs_mount_t *mp, - int *gen) + struct xfs_mount *mp, + struct xfs_ail_cursor *cur) { - xfs_log_item_t *lip; + xfs_log_item_t *lip; + struct xfs_ail *ailp = mp->m_ail; - lip = xfs_ail_min(mp->m_ail); - *gen = (int)mp->m_ail->xa_gen; + lip = xfs_ail_min(ailp); + xfs_trans_ail_cursor_set(ailp, cur, lip); return lip; } /* - * If the generation count of the tree has not changed since the - * caller last took something from the AIL, then return the elmt - * in the tree which follows the one given. If the count has changed, - * then return the minimum elmt of the AIL and bump the restarts counter - * if one is given. + * Grab the next item in the AIL from the cursor passed in. */ xfs_log_item_t * xfs_trans_next_ail( - xfs_mount_t *mp, - xfs_log_item_t *lip, - int *gen, - int *restarts) + struct xfs_mount *mp, + struct xfs_ail_cursor *cur) { - xfs_log_item_t *nlip; - - ASSERT(mp && lip && gen); - if (mp->m_ail->xa_gen == *gen) { - nlip = xfs_ail_next(mp->m_ail, lip); - } else { - nlip = xfs_ail_min(mp->m_ail); - *gen = (int)mp->m_ail->xa_gen; - if (restarts != NULL) { - XFS_STATS_INC(xs_push_ail_restarts); - (*restarts)++; - } - } + struct xfs_ail *ailp = mp->m_ail; - return (nlip); + return xfs_trans_ail_cursor_next(ailp, cur); } @@ -517,6 +603,7 @@ xfs_trans_ail_init( xfs_mount_t *mp) { struct xfs_ail *ailp; + int error; ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); if (!ailp) @@ -524,7 +611,15 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); - return xfsaild_start(ailp); + error = xfsaild_start(ailp); + if (error) + goto out_free_ailp; + mp->m_ail = ailp; + return 0; + +out_free_ailp: + kmem_free(ailp); + return error; } void diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 98317fdc33b..f114d388570 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -44,20 +44,33 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, xfs_extlen_t idx); /* - * From xfs_trans_ail.c + * AIL traversal cursor. + * + * Rather than using a generation number for detecting changes in the ail, use + * a cursor that is protected by the ail lock. The aild cursor exists in the + * struct xfs_ail, but other traversals can declare it on the stack and link it + * to the ail list. + * + * When an object is deleted from or moved int the AIL, the cursor list is + * searched to see if the object is a designated cursor item. If it is, it is + * deleted from the cursor so that the next time the cursor is used traversal + * will return to the start. + * + * This means a traversal colliding with a removal will cause a restart of the + * list scan, rather than any insertion or deletion anywhere in the list. The + * low bit of the item pointer is set if the cursor has been invalidated so + * that we can tell the difference between invalidation and reaching the end + * of the list to trigger traversal restarts. */ -void xfs_trans_update_ail(struct xfs_mount *mp, - struct xfs_log_item *lip, xfs_lsn_t lsn) - __releases(mp->m_ail_lock); -void xfs_trans_delete_ail(struct xfs_mount *mp, - struct xfs_log_item *lip) - __releases(mp->m_ail_lock); -struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *); -struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *, - struct xfs_log_item *, int *, int *); +struct xfs_ail_cursor { + struct xfs_ail_cursor *next; + struct xfs_log_item *item; +}; /* - * AIL push thread support + * Private AIL structures. + * + * Eventually we need to drive the locking in here as well. */ struct xfs_ail { struct xfs_mount *xa_mount; @@ -65,8 +78,28 @@ struct xfs_ail { uint xa_gen; struct task_struct *xa_task; xfs_lsn_t xa_target; + struct xfs_ail_cursor xa_cursors; }; +/* + * From xfs_trans_ail.c + */ +void xfs_trans_update_ail(struct xfs_mount *mp, + struct xfs_log_item *lip, xfs_lsn_t lsn) + __releases(mp->m_ail_lock); +void xfs_trans_delete_ail(struct xfs_mount *mp, + struct xfs_log_item *lip) + __releases(mp->m_ail_lock); +struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *mp, + struct xfs_ail_cursor *cur); +struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *mp, + struct xfs_ail_cursor *cur); + +void xfs_trans_ail_cursor_init(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur); +void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur); + long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); int xfsaild_start(struct xfs_ail *); -- cgit v1.2.3 From 5b00f14fbd60d42441f78c0e414a539cbfba5cb9 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:00 +1100 Subject: [XFS] move the AIl traversal over to a consistent interface With the new cursor interface, it makes sense to make all the traversing code use the cursor interface and make the old one go away. This means more of the AIL interfacing is done by passing struct xfs_ail pointers around the place instead of struct xfs_mount pointers. We can replace the use of xfs_trans_first_ail() in xfs_log_need_covered() as it is only checking if the AIL is empty. We can do that with a call to xfs_trans_ail_tail() instead, where a zero LSN returned indicates and empty AIL... SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32348a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 4 +- fs/xfs/xfs_log_recover.c | 15 +++--- fs/xfs/xfs_trans.h | 1 - fs/xfs/xfs_trans_ail.c | 117 +++++++++++++++++------------------------------ fs/xfs/xfs_trans_priv.h | 13 +++--- 5 files changed, 58 insertions(+), 92 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4184085d44a..31fbb2eea09 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -909,7 +909,7 @@ xfs_log_need_covered(xfs_mount_t *mp) spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) - && !xfs_trans_first_ail(mp, NULL) + && !xfs_trans_ail_tail(mp->m_ail) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; @@ -946,7 +946,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) xfs_lsn_t tail_lsn; xlog_t *log = mp->m_log; - tail_lsn = xfs_trans_tail_ail(mp); + tail_lsn = xfs_trans_ail_tail(mp->m_ail); spin_lock(&log->l_grant_lock); if (tail_lsn != 0) { log->l_tail_lsn = tail_lsn; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 37ba4899f3e..45ea0d95013 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2728,8 +2728,7 @@ xlog_recover_do_efd_trans( */ mp = log->l_mp; spin_lock(&mp->m_ail_lock); - xfs_trans_ail_cursor_init(mp->m_ail, &cur); - lip = xfs_trans_first_ail(mp, &cur); + lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { efip = (xfs_efi_log_item_t *)lip; @@ -2744,7 +2743,7 @@ xlog_recover_do_efd_trans( break; } } - lip = xfs_trans_next_ail(mp, &cur); + lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); } xfs_trans_ail_cursor_done(mp->m_ail, &cur); spin_unlock(&mp->m_ail_lock); @@ -3061,8 +3060,7 @@ xlog_recover_process_efis( mp = log->l_mp; spin_lock(&mp->m_ail_lock); - xfs_trans_ail_cursor_init(mp->m_ail, &cur); - lip = xfs_trans_first_ail(mp, &cur); + lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); while (lip != NULL) { /* * We're done when we see something other than an EFI. @@ -3070,7 +3068,8 @@ xlog_recover_process_efis( */ if (lip->li_type != XFS_LI_EFI) { #ifdef DEBUG - for (; lip; lip = xfs_trans_next_ail(mp, &cur)) + for (; lip; + lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur)) ASSERT(lip->li_type != XFS_LI_EFI); #endif break; @@ -3081,7 +3080,7 @@ xlog_recover_process_efis( */ efip = (xfs_efi_log_item_t *)lip; if (efip->efi_flags & XFS_EFI_RECOVERED) { - lip = xfs_trans_next_ail(mp, &cur); + lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); continue; } @@ -3090,7 +3089,7 @@ xlog_recover_process_efis( spin_lock(&mp->m_ail_lock); if (error) goto out; - lip = xfs_trans_next_ail(mp, &cur); + lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); } out: xfs_trans_ail_cursor_done(mp->m_ail, &cur); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 1d89d50a5b9..ae2ae3e020d 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -971,7 +971,6 @@ void xfs_trans_cancel(xfs_trans_t *, int); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); -xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *); void xfs_trans_unlocked_item(struct xfs_mount *, xfs_log_item_t *); xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 7b8bfcf1d3d..286934d56ec 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -50,20 +50,20 @@ STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); * lsn of the last item in the AIL. */ xfs_lsn_t -xfs_trans_tail_ail( - xfs_mount_t *mp) +xfs_trans_ail_tail( + struct xfs_ail *ailp) { xfs_lsn_t lsn; xfs_log_item_t *lip; - spin_lock(&mp->m_ail_lock); - lip = xfs_ail_min(mp->m_ail); + spin_lock(&ailp->xa_mount->m_ail_lock); + lip = xfs_ail_min(ailp); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { lsn = lip->li_lsn; } - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_mount->m_ail_lock); return lsn; } @@ -111,7 +111,7 @@ xfs_trans_push_ail( * We don't link the push cursor because it is embedded in the struct * xfs_ail and hence easily findable. */ -void +STATIC void xfs_trans_ail_cursor_init( struct xfs_ail *ailp, struct xfs_ail_cursor *cur) @@ -143,7 +143,7 @@ xfs_trans_ail_cursor_set( * If the cursor was invalidated (inidicated by a lip of 1), * restart the traversal. */ -STATIC struct xfs_log_item * +struct xfs_log_item * xfs_trans_ail_cursor_next( struct xfs_ail *ailp, struct xfs_ail_cursor *cur) @@ -156,30 +156,6 @@ xfs_trans_ail_cursor_next( return lip; } -/* - * Invalidate any cursor that is pointing to this item. This is - * called when an item is removed from the AIL. Any cursor pointing - * to this object is now invalid and the traversal needs to be - * terminated so it doesn't reference a freed object. We set the - * cursor item to a value of 1 so we can distinguish between an - * invalidation and the end of the list when getting the next item - * from the cursor. - */ -STATIC void -xfs_trans_ail_cursor_clear( - struct xfs_ail *ailp, - struct xfs_log_item *lip) -{ - struct xfs_ail_cursor *cur; - - /* need to search all cursors */ - for (cur = &ailp->xa_cursors; cur; cur = cur->next) { - if (cur->item == lip) - cur->item = (struct xfs_log_item *) - ((__psint_t)cur->item | 1); - } -} - /* * Now that the traversal is complete, we need to remove the cursor * from the list of traversing cursors. Avoid removing the embedded @@ -207,32 +183,56 @@ xfs_trans_ail_cursor_done( ASSERT(cur); } +/* + * Invalidate any cursor that is pointing to this item. This is + * called when an item is removed from the AIL. Any cursor pointing + * to this object is now invalid and the traversal needs to be + * terminated so it doesn't reference a freed object. We set the + * cursor item to a value of 1 so we can distinguish between an + * invalidation and the end of the list when getting the next item + * from the cursor. + */ +STATIC void +xfs_trans_ail_cursor_clear( + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_ail_cursor *cur; + + /* need to search all cursors */ + for (cur = &ailp->xa_cursors; cur; cur = cur->next) { + if (cur->item == lip) + cur->item = (struct xfs_log_item *) + ((__psint_t)cur->item | 1); + } +} + /* * Return the item in the AIL with the current lsn. * Return the current tree generation number for use * in calls to xfs_trans_next_ail(). */ -STATIC xfs_log_item_t * -xfs_trans_first_push_ail( +xfs_log_item_t * +xfs_trans_ail_cursor_first( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn) { xfs_log_item_t *lip; + xfs_trans_ail_cursor_init(ailp, cur); lip = xfs_ail_min(ailp); - xfs_trans_ail_cursor_set(ailp, cur, lip); if (lsn == 0) - return lip; + goto out; list_for_each_entry(lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) { - xfs_trans_ail_cursor_set(ailp, cur, lip); - return lip; - } + if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) + break; } - - return NULL; + lip = NULL; +out: + xfs_trans_ail_cursor_set(ailp, cur, lip); + return lip; } /* @@ -254,7 +254,7 @@ xfsaild_push( spin_lock(&mp->m_ail_lock); xfs_trans_ail_cursor_init(ailp, cur); - lip = xfs_trans_first_push_ail(ailp, cur, *last_lsn); + lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. @@ -551,39 +551,6 @@ xfs_trans_delete_ail( -/* - * Return the item in the AIL with the smallest lsn. - * Return the current tree generation number for use - * in calls to xfs_trans_next_ail(). - */ -xfs_log_item_t * -xfs_trans_first_ail( - struct xfs_mount *mp, - struct xfs_ail_cursor *cur) -{ - xfs_log_item_t *lip; - struct xfs_ail *ailp = mp->m_ail; - - lip = xfs_ail_min(ailp); - xfs_trans_ail_cursor_set(ailp, cur, lip); - - return lip; -} - -/* - * Grab the next item in the AIL from the cursor passed in. - */ -xfs_log_item_t * -xfs_trans_next_ail( - struct xfs_mount *mp, - struct xfs_ail_cursor *cur) -{ - struct xfs_ail *ailp = mp->m_ail; - - return xfs_trans_ail_cursor_next(ailp, cur); -} - - /* * The active item list (AIL) is a doubly linked list of log * items sorted by ascending lsn. The base of the list is diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index f114d388570..aa585350252 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -90,14 +90,15 @@ void xfs_trans_update_ail(struct xfs_mount *mp, void xfs_trans_delete_ail(struct xfs_mount *mp, struct xfs_log_item *lip) __releases(mp->m_ail_lock); -struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *mp, - struct xfs_ail_cursor *cur); -struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *mp, - struct xfs_ail_cursor *cur); -void xfs_trans_ail_cursor_init(struct xfs_ail *ailp, +xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); + +struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn); +struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); -void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, +void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); -- cgit v1.2.3 From 7b2e2a31f5c23b5f028af8c895137b4c512cc1c8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:12 +1100 Subject: [XFS] Allow 64 bit machines to avoid the AIL lock during flushes When copying lsn's from the log item to the inode or dquot flush lsn, we currently grab the AIL lock. We do this because the LSN is a 64 bit quantity and it needs to be read atomically. The lock is used to guarantee atomicity for 32 bit platforms. Make the LSN copying a small function, and make the function used conditional on BITS_PER_LONG so that 64 bit machines don't need to take the AIL lock in these places. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32349a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/quota/xfs_dquot.c | 6 ++---- fs/xfs/xfs_inode.c | 17 +++++++---------- fs/xfs/xfs_trans_priv.h | 23 +++++++++++++++++++++++ 3 files changed, 32 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 1e6bf392564..59c1081412e 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1272,10 +1272,8 @@ xfs_qm_dqflush( dqp->dq_flags &= ~(XFS_DQ_DIRTY); mp = dqp->q_mount; - /* lsn is 64 bits */ - spin_lock(&mp->m_ail_lock); - dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn; - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, + &dqp->q_logitem.qli_item.li_lsn); /* * Attach an iodone routine so that we can remove this dquot from the diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4eb629f0513..2951ffd8306 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2214,9 +2214,9 @@ xfs_ifree_cluster( iip = (xfs_inode_log_item_t *)lip; ASSERT(iip->ili_logged == 1); lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; - spin_lock(&mp->m_ail_lock); - iip->ili_flush_lsn = iip->ili_item.li_lsn; - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_copy_lsn(mp->m_ail, + &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); pre_flushed++; } @@ -2237,9 +2237,8 @@ xfs_ifree_cluster( iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; iip->ili_logged = 1; - spin_lock(&mp->m_ail_lock); - iip->ili_flush_lsn = iip->ili_item.li_lsn; - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) @@ -3476,10 +3475,8 @@ xfs_iflush_int( iip->ili_format.ilf_fields = 0; iip->ili_logged = 1; - ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ - spin_lock(&mp->m_ail_lock); - iip->ili_flush_lsn = iip->ili_item.li_lsn; - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); /* * Attach the function xfs_iflush_done to the inode's diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index aa585350252..708cff72d20 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -106,4 +106,27 @@ void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); int xfsaild_start(struct xfs_ail *); void xfsaild_stop(struct xfs_ail *); +#if BITS_PER_LONG != 64 +static inline void +xfs_trans_ail_copy_lsn( + struct xfs_ail *ailp, + xfs_lsn_t *dst, + xfs_lsn_t *src) +{ + ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ + spin_lock(&ailp->xa_mount->m_ail_lock); + *dst = *src; + spin_unlock(&ailp->xa_mount->m_ail_lock); +} +#else +static inline void +xfs_trans_ail_copy_lsn( + struct xfs_ail *ailp, + xfs_lsn_t *dst, + xfs_lsn_t *src) +{ + ASSERT(sizeof(xfs_lsn_t) == 8); + *dst = *src; +} +#endif #endif /* __XFS_TRANS_PRIV_H__ */ -- cgit v1.2.3 From c7e8f268278a292d3823b4352182fa7755a71410 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:23 +1100 Subject: [XFS] Move the AIL lock into the struct xfs_ail Bring the ail lock inside the struct xfs_ail. This means the AIL can be entirely manipulated via the struct xfs_ail rather than needing both the struct xfs_mount and the struct xfs_ail. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32350a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/quota/xfs_dquot.c | 4 ++-- fs/xfs/quota/xfs_dquot_item.c | 2 +- fs/xfs/xfs_buf_item.c | 4 ++-- fs/xfs/xfs_extfree_item.c | 12 +++++----- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_inode_item.c | 8 +++---- fs/xfs/xfs_log.c | 1 - fs/xfs/xfs_log_recover.c | 16 ++++++------- fs/xfs/xfs_mount.h | 1 - fs/xfs/xfs_trans.c | 4 ++-- fs/xfs/xfs_trans_ail.c | 56 ++++++++++++++++++++++--------------------- fs/xfs/xfs_trans_priv.h | 5 ++-- 12 files changed, 59 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 59c1081412e..0d7a62bffee 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1333,7 +1333,7 @@ xfs_qm_dqflush_done( if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && qip->qli_item.li_lsn == qip->qli_flush_lsn) { - spin_lock(&dqp->q_mount->m_ail_lock); + spin_lock(&dqp->q_mount->m_ail->xa_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ @@ -1341,7 +1341,7 @@ xfs_qm_dqflush_done( xfs_trans_delete_ail(dqp->q_mount, (xfs_log_item_t*)qip); else - spin_unlock(&dqp->q_mount->m_ail_lock); + spin_unlock(&dqp->q_mount->m_ail->xa_lock); } /* diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 48f08109621..0e1fa517db0 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -555,7 +555,7 @@ xfs_qm_qoffend_logitem_committed( xfs_qoff_logitem_t *qfs; qfs = qfe->qql_start_lip; - spin_lock(&qfs->qql_item.li_mountp->m_ail_lock); + spin_lock(&qfs->qql_item.li_mountp->m_ail->xa_lock); /* * Delete the qoff-start logitem from the AIL. * xfs_trans_delete_ail() drops the AIL lock. diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 002fc2617c8..c557fd68252 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -408,7 +408,7 @@ xfs_buf_item_unpin( XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); @@ -1138,7 +1138,7 @@ xfs_buf_iodone( * * Either way, AIL is useless if we're forcing a shutdown. */ - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 8aa28f751b2..f1dcd80cf06 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -111,7 +111,7 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) xfs_mount_t *mp; mp = efip->efi_item.li_mountp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * xfs_trans_delete_ail() drops the AIL lock. @@ -120,7 +120,7 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } } @@ -138,7 +138,7 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) xfs_log_item_desc_t *lidp; mp = efip->efi_item.li_mountp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * free the xaction descriptor pointing to this item @@ -153,7 +153,7 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } } @@ -352,7 +352,7 @@ xfs_efi_release(xfs_efi_log_item_t *efip, ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); ASSERT(efip->efi_next_extent >= nextents); efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; @@ -363,7 +363,7 @@ xfs_efi_release(xfs_efi_log_item_t *efip, xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2951ffd8306..6d82c23629e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2715,11 +2715,11 @@ xfs_idestroy( ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || XFS_FORCED_SHUTDOWN(ip->i_mount)); if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (lip->li_flags & XFS_LI_IN_AIL) xfs_trans_delete_ail(mp, lip); else - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } xfs_inode_item_destroy(ip); ip->i_itemp = NULL; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 97c7452e262..291d30aded6 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -991,7 +991,7 @@ xfs_iflush_done( */ if (iip->ili_logged && (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { - spin_lock(&ip->i_mount->m_ail_lock); + spin_lock(&ip->i_mount->m_ail->xa_lock); if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { /* * xfs_trans_delete_ail() drops the AIL lock. @@ -999,7 +999,7 @@ xfs_iflush_done( xfs_trans_delete_ail(ip->i_mount, (xfs_log_item_t*)iip); } else { - spin_unlock(&ip->i_mount->m_ail_lock); + spin_unlock(&ip->i_mount->m_ail->xa_lock); } } @@ -1038,14 +1038,14 @@ xfs_iflush_abort( mp = ip->i_mount; if (iip) { if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip); } else - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } iip->ili_logged = 0; /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 31fbb2eea09..a2f7422a749 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -567,7 +567,6 @@ xfs_log_mount( /* * Initialize the AIL now we have a log. */ - spin_lock_init(&mp->m_ail_lock); error = xfs_trans_ail_init(mp); if (error) { cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 45ea0d95013..a484febb9ec 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2681,7 +2681,7 @@ xlog_recover_do_efi_trans( efip->efi_next_extent = efi_formatp->efi_nextents; efip->efi_flags |= XFS_EFI_COMMITTED; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); /* * xfs_trans_update_ail() drops the AIL lock. */ @@ -2727,7 +2727,7 @@ xlog_recover_do_efd_trans( * in the AIL. */ mp = log->l_mp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { @@ -2739,14 +2739,14 @@ xlog_recover_do_efd_trans( */ xfs_trans_delete_ail(mp, lip); xfs_efi_item_free(efip); - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); break; } } lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); } xfs_trans_ail_cursor_done(mp->m_ail, &cur); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } /* @@ -3058,7 +3058,7 @@ xlog_recover_process_efis( struct xfs_ail_cursor cur; mp = log->l_mp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); while (lip != NULL) { @@ -3084,16 +3084,16 @@ xlog_recover_process_efis( continue; } - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); error = xlog_recover_process_efi(mp, efip); - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (error) goto out; lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); } out: xfs_trans_ail_cursor_done(mp->m_ail, &cur); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); return error; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 287f90011ed..d3b75257602 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -228,7 +228,6 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ - spinlock_t m_ail_lock; /* fs AIL mutex */ struct xfs_ail *m_ail; /* fs active log item list */ xfs_sb_t m_sb; /* copy of fs superblock */ spinlock_t m_sb_lock; /* sb counter lock */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 4e1c22a23be..99ba0e2658b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1425,7 +1425,7 @@ xfs_trans_chunk_committed( * the test below. */ mp = lip->li_mountp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { /* * This will set the item's lsn to item_lsn @@ -1436,7 +1436,7 @@ xfs_trans_chunk_committed( */ xfs_trans_update_ail(mp, lip, item_lsn); } else { - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 286934d56ec..0cd47a797d3 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2008 Dave Chinner * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -56,14 +57,14 @@ xfs_trans_ail_tail( xfs_lsn_t lsn; xfs_log_item_t *lip; - spin_lock(&ailp->xa_mount->m_ail_lock); + spin_lock(&ailp->xa_lock); lip = xfs_ail_min(ailp); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { lsn = lip->li_lsn; } - spin_unlock(&ailp->xa_mount->m_ail_lock); + spin_unlock(&ailp->xa_lock); return lsn; } @@ -252,7 +253,7 @@ xfsaild_push( xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; - spin_lock(&mp->m_ail_lock); + spin_lock(&ailp->xa_lock); xfs_trans_ail_cursor_init(ailp, cur); lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { @@ -260,7 +261,7 @@ xfsaild_push( * AIL is empty or our push has reached the end. */ xfs_trans_ail_cursor_done(ailp, cur); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); last_pushed_lsn = 0; return tout; } @@ -295,7 +296,7 @@ xfsaild_push( * skip to the next item in the list. */ lock_result = IOP_TRYLOCK(lip); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); switch (lock_result) { case XFS_ITEM_SUCCESS: XFS_STATS_INC(xs_push_ail_success); @@ -332,7 +333,7 @@ xfsaild_push( break; } - spin_lock(&mp->m_ail_lock); + spin_lock(&ailp->xa_lock); /* should we bother continuing? */ if (XFS_FORCED_SHUTDOWN(mp)) break; @@ -361,7 +362,7 @@ xfsaild_push( lsn = lip->li_lsn; } xfs_trans_ail_cursor_done(ailp, cur); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); if (flush_log) { /* @@ -462,30 +463,31 @@ void xfs_trans_update_ail( xfs_mount_t *mp, xfs_log_item_t *lip, - xfs_lsn_t lsn) __releases(mp->m_ail_lock) + xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_log_item_t *dlip=NULL; + struct xfs_ail *ailp = mp->m_ail; + xfs_log_item_t *dlip = NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ - mlip = xfs_ail_min(mp->m_ail); + mlip = xfs_ail_min(ailp); if (lip->li_flags & XFS_LI_IN_AIL) { - dlip = xfs_ail_delete(mp->m_ail, lip); + dlip = xfs_ail_delete(ailp, lip); ASSERT(dlip == lip); - xfs_trans_ail_cursor_clear(mp->m_ail, dlip); + xfs_trans_ail_cursor_clear(ailp, dlip); } else { lip->li_flags |= XFS_LI_IN_AIL; } lip->li_lsn = lsn; - xfs_ail_insert(mp->m_ail, lip); + xfs_ail_insert(ailp, lip); if (mlip == dlip) { - mlip = xfs_ail_min(mp->m_ail); - spin_unlock(&mp->m_ail_lock); + mlip = xfs_ail_min(ailp); + spin_unlock(&ailp->xa_lock); xfs_log_move_tail(mp, mlip->li_lsn); } else { - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); } @@ -509,27 +511,28 @@ xfs_trans_update_ail( void xfs_trans_delete_ail( xfs_mount_t *mp, - xfs_log_item_t *lip) __releases(mp->m_ail_lock) + xfs_log_item_t *lip) __releases(ailp->xa_lock) { + struct xfs_ail *ailp = mp->m_ail; xfs_log_item_t *dlip; xfs_log_item_t *mlip; if (lip->li_flags & XFS_LI_IN_AIL) { - mlip = xfs_ail_min(mp->m_ail); - dlip = xfs_ail_delete(mp->m_ail, lip); + mlip = xfs_ail_min(ailp); + dlip = xfs_ail_delete(ailp, lip); ASSERT(dlip == lip); - xfs_trans_ail_cursor_clear(mp->m_ail, dlip); + xfs_trans_ail_cursor_clear(ailp, dlip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; if (mlip == dlip) { - mlip = xfs_ail_min(mp->m_ail); - spin_unlock(&mp->m_ail_lock); + mlip = xfs_ail_min(ailp); + spin_unlock(&ailp->xa_lock); xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); } else { - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); } } else { @@ -537,13 +540,11 @@ xfs_trans_delete_ail( * If the file system is not being shutdown, we are in * serious trouble if we get to this stage. */ - if (XFS_FORCED_SHUTDOWN(mp)) - spin_unlock(&mp->m_ail_lock); - else { + spin_unlock(&ailp->xa_lock); + if (!XFS_FORCED_SHUTDOWN(mp)) { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, "%s: attempting to delete a log item that is not in the AIL", __func__); - spin_unlock(&mp->m_ail_lock); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } } @@ -578,6 +579,7 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); + spin_lock_init(&ailp->xa_lock); error = xfsaild_start(ailp); if (error) goto out_free_ailp; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 708cff72d20..6ca0a7a7e3d 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -79,6 +79,7 @@ struct xfs_ail { struct task_struct *xa_task; xfs_lsn_t xa_target; struct xfs_ail_cursor xa_cursors; + spinlock_t xa_lock; }; /* @@ -114,9 +115,9 @@ xfs_trans_ail_copy_lsn( xfs_lsn_t *src) { ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ - spin_lock(&ailp->xa_mount->m_ail_lock); + spin_lock(&ailp->xa_lock); *dst = *src; - spin_unlock(&ailp->xa_mount->m_ail_lock); + spin_unlock(&ailp->xa_lock); } #else static inline void -- cgit v1.2.3 From a9c21c1b9deaced836034e77fe25fe0b55c21f02 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:35 +1100 Subject: [XFS] Given the log a pointer to the AIL When we need to go from the log to the AIL, we have to go via the xfs_mount. Add a xfs_ail pointer to the log so we can go directly to the AIL associated with the log. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32351a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 3 ++- fs/xfs/xfs_log_priv.h | 1 + fs/xfs/xfs_log_recover.c | 42 +++++++++++++++++++++--------------------- 3 files changed, 24 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a2f7422a749..405a41ab685 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -572,6 +572,7 @@ xfs_log_mount( cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); goto error; } + mp->m_log->l_ailp = mp->m_ail; /* * skip log recovery on a norecovery mount. pretend it all @@ -908,7 +909,7 @@ xfs_log_need_covered(xfs_mount_t *mp) spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) - && !xfs_trans_ail_tail(mp->m_ail) + && !xfs_trans_ail_tail(log->l_ailp) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index e7d8f84443f..de7ef6ca920 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -404,6 +404,7 @@ typedef struct xlog_in_core { typedef struct log { /* The following fields don't need locking */ struct xfs_mount *l_mp; /* mount point */ + struct xfs_ail *l_ailp; /* AIL log is working with */ struct xfs_buf *l_xbuf; /* extra buffer for log * wrapping */ struct xfs_buftarg *l_targ; /* buftarg of log */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a484febb9ec..0bbde7b84fc 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2681,7 +2681,7 @@ xlog_recover_do_efi_trans( efip->efi_next_extent = efi_formatp->efi_nextents; efip->efi_flags |= XFS_EFI_COMMITTED; - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&log->l_ailp->xa_lock); /* * xfs_trans_update_ail() drops the AIL lock. */ @@ -2710,6 +2710,7 @@ xlog_recover_do_efd_trans( xfs_log_item_t *lip; __uint64_t efi_id; struct xfs_ail_cursor cur; + struct xfs_ail *ailp; if (pass == XLOG_RECOVER_PASS1) { return; @@ -2727,8 +2728,9 @@ xlog_recover_do_efd_trans( * in the AIL. */ mp = log->l_mp; - spin_lock(&mp->m_ail->xa_lock); - lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); + ailp = log->l_ailp; + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { efip = (xfs_efi_log_item_t *)lip; @@ -2739,14 +2741,14 @@ xlog_recover_do_efd_trans( */ xfs_trans_delete_ail(mp, lip); xfs_efi_item_free(efip); - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); break; } } - lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); + lip = xfs_trans_ail_cursor_next(ailp, &cur); } - xfs_trans_ail_cursor_done(mp->m_ail, &cur); - spin_unlock(&mp->m_ail->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); } /* @@ -3053,14 +3055,13 @@ xlog_recover_process_efis( { xfs_log_item_t *lip; xfs_efi_log_item_t *efip; - xfs_mount_t *mp; int error = 0; struct xfs_ail_cursor cur; + struct xfs_ail *ailp; - mp = log->l_mp; - spin_lock(&mp->m_ail->xa_lock); - - lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); + ailp = log->l_ailp; + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { /* * We're done when we see something other than an EFI. @@ -3068,8 +3069,7 @@ xlog_recover_process_efis( */ if (lip->li_type != XFS_LI_EFI) { #ifdef DEBUG - for (; lip; - lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur)) + for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) ASSERT(lip->li_type != XFS_LI_EFI); #endif break; @@ -3080,20 +3080,20 @@ xlog_recover_process_efis( */ efip = (xfs_efi_log_item_t *)lip; if (efip->efi_flags & XFS_EFI_RECOVERED) { - lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); + lip = xfs_trans_ail_cursor_next(ailp, &cur); continue; } - spin_unlock(&mp->m_ail->xa_lock); - error = xlog_recover_process_efi(mp, efip); - spin_lock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); + error = xlog_recover_process_efi(log->l_mp, efip); + spin_lock(&ailp->xa_lock); if (error) goto out; - lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); + lip = xfs_trans_ail_cursor_next(ailp, &cur); } out: - xfs_trans_ail_cursor_done(mp->m_ail, &cur); - spin_unlock(&mp->m_ail->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); return error; } -- cgit v1.2.3 From fc1829f34d30899701dfd5890030d39e13e1f47d Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:46 +1100 Subject: [XFS] Add ail pointer into log items Add an xfs_ail pointer to log items so that the log items can reference the AIL directly during callbacks without needed a struct xfs_mount. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32352a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_buf_item.c | 5 ++++- fs/xfs/xfs_extfree_item.c | 28 ++++++++++++++++++---------- fs/xfs/xfs_inode_item.c | 1 + fs/xfs/xfs_trans.c | 9 ++++++--- fs/xfs/xfs_trans.h | 1 + fs/xfs/xfs_trans_item.c | 10 ++++++++++ 6 files changed, 40 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index c557fd68252..793e53c01dc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -731,6 +731,7 @@ xfs_buf_item_init( bip->bli_item.li_type = XFS_LI_BUF; bip->bli_item.li_ops = &xfs_buf_item_ops; bip->bli_item.li_mountp = mp; + bip->bli_item.li_ailp = mp->m_ail; bip->bli_buf = bp; xfs_buf_hold(bp); bip->bli_format.blf_type = XFS_LI_BUF; @@ -1123,11 +1124,13 @@ xfs_buf_iodone( xfs_buf_log_item_t *bip) { struct xfs_mount *mp; + struct xfs_ail *ailp; ASSERT(bip->bli_buf == bp); xfs_buf_rele(bp); mp = bip->bli_item.li_mountp; + ailp = bip->bli_item.li_ailp; /* * If we are forcibly shutting down, this may well be @@ -1138,7 +1141,7 @@ xfs_buf_iodone( * * Either way, AIL is useless if we're forcing a shutdown. */ - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f1dcd80cf06..dab57374e1f 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -108,10 +108,12 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip) STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { - xfs_mount_t *mp; + xfs_mount_t *mp; + struct xfs_ail *ailp; mp = efip->efi_item.li_mountp; - spin_lock(&mp->m_ail->xa_lock); + ailp = efip->efi_item.li_ailp; + spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * xfs_trans_delete_ail() drops the AIL lock. @@ -120,7 +122,7 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } } @@ -134,11 +136,13 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) STATIC void xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { - xfs_mount_t *mp; + xfs_mount_t *mp; + struct xfs_ail *ailp; xfs_log_item_desc_t *lidp; mp = efip->efi_item.li_mountp; - spin_lock(&mp->m_ail->xa_lock); + ailp = efip->efi_item.li_ailp; + spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * free the xaction descriptor pointing to this item @@ -153,7 +157,7 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } } @@ -268,6 +272,7 @@ xfs_efi_init(xfs_mount_t *mp, efip->efi_item.li_type = XFS_LI_EFI; efip->efi_item.li_ops = &xfs_efi_item_ops; efip->efi_item.li_mountp = mp; + efip->efi_item.li_ailp = mp->m_ail; efip->efi_format.efi_nextents = nextents; efip->efi_format.efi_id = (__psint_t)(void*)efip; @@ -345,14 +350,16 @@ void xfs_efi_release(xfs_efi_log_item_t *efip, uint nextents) { - xfs_mount_t *mp; - int extents_left; + xfs_mount_t *mp; + struct xfs_ail *ailp; + int extents_left; mp = efip->efi_item.li_mountp; + ailp = efip->efi_item.li_ailp; ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); ASSERT(efip->efi_next_extent >= nextents); efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; @@ -363,7 +370,7 @@ xfs_efi_release(xfs_efi_log_item_t *efip, xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } } @@ -565,6 +572,7 @@ xfs_efd_init(xfs_mount_t *mp, efdp->efd_item.li_type = XFS_LI_EFD; efdp->efd_item.li_ops = &xfs_efd_item_ops; efdp->efd_item.li_mountp = mp; + efdp->efd_item.li_ailp = mp->m_ail; efdp->efd_efip = efip; efdp->efd_format.efd_nextents = nextents; efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 291d30aded6..47594f4b51d 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -932,6 +932,7 @@ xfs_inode_item_init( iip->ili_item.li_type = XFS_LI_INODE; iip->ili_item.li_ops = &xfs_inode_item_ops; iip->ili_item.li_mountp = mp; + iip->ili_item.li_ailp = mp->m_ail; iip->ili_inode = ip; /* diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 99ba0e2658b..5163e1216c8 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1383,11 +1383,13 @@ xfs_trans_chunk_committed( xfs_log_item_desc_t *lidp; xfs_log_item_t *lip; xfs_lsn_t item_lsn; - struct xfs_mount *mp; int i; lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { + struct xfs_mount *mp; + struct xfs_ail *ailp; + if (xfs_lic_isfree(licp, i)) { continue; } @@ -1425,7 +1427,8 @@ xfs_trans_chunk_committed( * the test below. */ mp = lip->li_mountp; - spin_lock(&mp->m_ail->xa_lock); + ailp = lip->li_ailp; + spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { /* * This will set the item's lsn to item_lsn @@ -1436,7 +1439,7 @@ xfs_trans_chunk_committed( */ xfs_trans_update_ail(mp, lip, item_lsn); } else { - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ae2ae3e020d..0df51547757 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -768,6 +768,7 @@ typedef struct xfs_log_item { xfs_lsn_t li_lsn; /* last on-disk lsn */ struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ struct xfs_mount *li_mountp; /* ptr to fs mount */ + struct xfs_ail *li_ailp; /* ptr to AIL */ uint li_type; /* item type */ uint li_flags; /* misc flags */ struct xfs_log_item *li_bio_list; /* buffer item list */ diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index 3c666e8317f..e110bf57d7f 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c @@ -22,6 +22,14 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" +/* XXX: from here down needed until struct xfs_trans has it's own ailp */ +#include "xfs_bit.h" +#include "xfs_buf_item.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *, int, int, xfs_lsn_t); @@ -79,6 +87,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) lidp->lid_size = 0; lip->li_desc = lidp; lip->li_mountp = tp->t_mountp; + lip->li_ailp = tp->t_mountp->m_ail; return lidp; } @@ -120,6 +129,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) lidp->lid_size = 0; lip->li_desc = lidp; lip->li_mountp = tp->t_mountp; + lip->li_ailp = tp->t_mountp->m_ail; return lidp; } -- cgit v1.2.3 From 783a2f656f9674c31d4019708a94af93fa1d1c22 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:58 +1100 Subject: [XFS] Finish removing the mount pointer from the AIL API Change all the remaining AIL API functions that are passed struct xfs_mount pointers to pass pointers directly to the struct xfs_ail being used. With this conversion, all external access to the AIL is via the struct xfs_ail. Hence the operation and referencing of the AIL is almost entirely independent of the xfs_mount that is using it - it is now much more tightly tied to the log and the items it is tracking in the log than it is tied to the xfs_mount. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32353a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/quota/xfs_dquot.c | 15 +++++++-------- fs/xfs/quota/xfs_dquot_item.c | 8 +++++--- fs/xfs/xfs_buf_item.c | 24 +++++++++--------------- fs/xfs/xfs_extfree_item.c | 35 ++++++++++------------------------- fs/xfs/xfs_iget.c | 4 +++- fs/xfs/xfs_inode.c | 8 ++++---- fs/xfs/xfs_inode_item.c | 29 ++++++++++++----------------- fs/xfs/xfs_log.c | 2 +- fs/xfs/xfs_log_recover.c | 13 +++++-------- fs/xfs/xfs_trans.c | 6 ++---- fs/xfs/xfs_trans.h | 3 --- fs/xfs/xfs_trans_ail.c | 41 +++++++++++++++++++++-------------------- fs/xfs/xfs_trans_buf.c | 7 +++---- fs/xfs/xfs_trans_priv.h | 15 +++++++++------ 14 files changed, 91 insertions(+), 119 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 0d7a62bffee..591ca6602bf 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1319,8 +1319,10 @@ xfs_qm_dqflush_done( xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; + struct xfs_ail *ailp; dqp = qip->qli_dquot; + ailp = qip->qli_item.li_ailp; /* * We only want to pull the item from the AIL if its @@ -1333,15 +1335,12 @@ xfs_qm_dqflush_done( if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && qip->qli_item.li_lsn == qip->qli_flush_lsn) { - spin_lock(&dqp->q_mount->m_ail->xa_lock); - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ + /* xfs_trans_ail_delete() drops the AIL lock. */ + spin_lock(&ailp->xa_lock); if (qip->qli_item.li_lsn == qip->qli_flush_lsn) - xfs_trans_delete_ail(dqp->q_mount, - (xfs_log_item_t*)qip); + xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip); else - spin_unlock(&dqp->q_mount->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } /* @@ -1371,7 +1370,7 @@ xfs_dqunlock( mutex_unlock(&(dqp->q_qlock)); if (dqp->q_logitem.qli_dquot == dqp) { /* Once was dqp->q_mount, but might just have been cleared */ - xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp, + xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, (xfs_log_item_t*)&(dqp->q_logitem)); } } diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 0e1fa517db0..1728f6a7c4f 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -553,14 +553,16 @@ xfs_qm_qoffend_logitem_committed( xfs_lsn_t lsn) { xfs_qoff_logitem_t *qfs; + struct xfs_ail *ailp; qfs = qfe->qql_start_lip; - spin_lock(&qfs->qql_item.li_mountp->m_ail->xa_lock); + ailp = qfs->qql_item.li_ailp; + spin_lock(&ailp->xa_lock); /* * Delete the qoff-start logitem from the AIL. - * xfs_trans_delete_ail() drops the AIL lock. + * xfs_trans_ail_delete() drops the AIL lock. */ - xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); kmem_free(qfs); kmem_free(qfe); return (xfs_lsn_t)-1; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 793e53c01dc..d245d04e10c 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -375,7 +375,7 @@ xfs_buf_item_unpin( xfs_buf_log_item_t *bip, int stale) { - xfs_mount_t *mp; + struct xfs_ail *ailp; xfs_buf_t *bp; int freed; @@ -387,7 +387,7 @@ xfs_buf_item_unpin( xfs_buftrace("XFS_UNPIN", bp); freed = atomic_dec_and_test(&bip->bli_refcount); - mp = bip->bli_item.li_mountp; + ailp = bip->bli_item.li_ailp; xfs_bunpin(bp); if (freed && stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); @@ -399,17 +399,17 @@ xfs_buf_item_unpin( xfs_buftrace("XFS_UNPIN STALE", bp); /* * If we get called here because of an IO error, we may - * or may not have the item on the AIL. xfs_trans_delete_ail() + * or may not have the item on the AIL. xfs_trans_ail_delete() * will take care of that situation. - * xfs_trans_delete_ail() drops the AIL lock. + * xfs_trans_ail_delete() drops the AIL lock. */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { - spin_lock(&mp->m_ail->xa_lock); - xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); + spin_lock(&ailp->xa_lock); + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); } @@ -1123,29 +1123,23 @@ xfs_buf_iodone( xfs_buf_t *bp, xfs_buf_log_item_t *bip) { - struct xfs_mount *mp; - struct xfs_ail *ailp; + struct xfs_ail *ailp = bip->bli_item.li_ailp; ASSERT(bip->bli_buf == bp); xfs_buf_rele(bp); - mp = bip->bli_item.li_mountp; - ailp = bip->bli_item.li_ailp; /* * If we are forcibly shutting down, this may well be * off the AIL already. That's because we simulate the * log-committed callbacks to unpin these buffers. Or we may never * have put this item on AIL because of the transaction was - * aborted forcibly. xfs_trans_delete_ail() takes care of these. + * aborted forcibly. xfs_trans_ail_delete() takes care of these. * * Either way, AIL is useless if we're forcing a shutdown. */ spin_lock(&ailp->xa_lock); - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); xfs_buf_item_free(bip); } diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index dab57374e1f..05a4bdd4be3 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -108,17 +108,12 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip) STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { - xfs_mount_t *mp; - struct xfs_ail *ailp; + struct xfs_ail *ailp = efip->efi_item.li_ailp; - mp = efip->efi_item.li_mountp; - ailp = efip->efi_item.li_ailp; spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; @@ -136,12 +131,9 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) STATIC void xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { - xfs_mount_t *mp; - struct xfs_ail *ailp; + struct xfs_ail *ailp = efip->efi_item.li_ailp; xfs_log_item_desc_t *lidp; - mp = efip->efi_item.li_mountp; - ailp = efip->efi_item.li_ailp; spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* @@ -149,11 +141,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) */ lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip); xfs_trans_free_item(tp, lidp); - /* - * pull the item off the AIL. - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); + + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; @@ -350,12 +340,9 @@ void xfs_efi_release(xfs_efi_log_item_t *efip, uint nextents) { - xfs_mount_t *mp; - struct xfs_ail *ailp; + struct xfs_ail *ailp = efip->efi_item.li_ailp; int extents_left; - mp = efip->efi_item.li_mountp; - ailp = efip->efi_item.li_ailp; ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); @@ -364,10 +351,8 @@ xfs_efi_release(xfs_efi_log_item_t *efip, efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; if (extents_left == 0) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { spin_unlock(&ailp->xa_lock); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 800133805ca..a1f209b0596 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -38,6 +38,8 @@ #include "xfs_ialloc.h" #include "xfs_quota.h" #include "xfs_utils.h" +#include "xfs_trans_priv.h" +#include "xfs_inode_item.h" /* * Check the validity of the inode we just found it the cache @@ -616,7 +618,7 @@ xfs_iunlock( * it is in the AIL and anyone is waiting on it. Don't do * this if the caller has asked us not to. */ - xfs_trans_unlocked_item(ip->i_mount, + xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, (xfs_log_item_t*)(ip->i_itemp)); } xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6d82c23629e..c83f6998f95 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2709,17 +2709,17 @@ xfs_idestroy( * inode still in the AIL. If it is there, we should remove * it to prevent a use-after-free from occurring. */ - xfs_mount_t *mp = ip->i_mount; xfs_log_item_t *lip = &ip->i_itemp->ili_item; + struct xfs_ail *ailp = lip->li_ailp; ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || XFS_FORCED_SHUTDOWN(ip->i_mount)); if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_delete_ail(mp, lip); + xfs_trans_ail_delete(ailp, lip); else - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } xfs_inode_item_destroy(ip); ip->i_itemp = NULL; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 47594f4b51d..aa9bf05060c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -977,9 +977,8 @@ xfs_iflush_done( xfs_buf_t *bp, xfs_inode_log_item_t *iip) { - xfs_inode_t *ip; - - ip = iip->ili_inode; + xfs_inode_t *ip = iip->ili_inode; + struct xfs_ail *ailp = iip->ili_item.li_ailp; /* * We only want to pull the item from the AIL if it is @@ -992,15 +991,12 @@ xfs_iflush_done( */ if (iip->ili_logged && (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { - spin_lock(&ip->i_mount->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(ip->i_mount, - (xfs_log_item_t*)iip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip); } else { - spin_unlock(&ip->i_mount->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } } @@ -1032,21 +1028,20 @@ void xfs_iflush_abort( xfs_inode_t *ip) { - xfs_inode_log_item_t *iip; + xfs_inode_log_item_t *iip = ip->i_itemp; xfs_mount_t *mp; iip = ip->i_itemp; mp = ip->i_mount; if (iip) { + struct xfs_ail *ailp = iip->ili_item.li_ailp; if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip); } else - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } iip->ili_logged = 0; /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 405a41ab685..51840170b16 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1413,7 +1413,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, */ if (threshold_lsn && !XLOG_FORCED_SHUTDOWN(log)) - xfs_trans_push_ail(mp, threshold_lsn); + xfs_trans_ail_push(log->l_ailp, threshold_lsn); } /* xlog_grant_push_ail */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0bbde7b84fc..cff901efc24 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2683,9 +2683,9 @@ xlog_recover_do_efi_trans( spin_lock(&log->l_ailp->xa_lock); /* - * xfs_trans_update_ail() drops the AIL lock. + * xfs_trans_ail_update() drops the AIL lock. */ - xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn); + xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); return 0; } @@ -2704,13 +2704,12 @@ xlog_recover_do_efd_trans( xlog_recover_item_t *item, int pass) { - xfs_mount_t *mp; xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; __uint64_t efi_id; struct xfs_ail_cursor cur; - struct xfs_ail *ailp; + struct xfs_ail *ailp = log->l_ailp; if (pass == XLOG_RECOVER_PASS1) { return; @@ -2727,8 +2726,6 @@ xlog_recover_do_efd_trans( * Search for the efi with the id in the efd format structure * in the AIL. */ - mp = log->l_mp; - ailp = log->l_ailp; spin_lock(&ailp->xa_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { @@ -2736,10 +2733,10 @@ xlog_recover_do_efd_trans( efip = (xfs_efi_log_item_t *)lip; if (efip->efi_format.efi_id == efi_id) { /* - * xfs_trans_delete_ail() drops the + * xfs_trans_ail_delete() drops the * AIL lock. */ - xfs_trans_delete_ail(mp, lip); + xfs_trans_ail_delete(ailp, lip); xfs_efi_item_free(efip); spin_lock(&ailp->xa_lock); break; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 5163e1216c8..ad137efc870 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1387,7 +1387,6 @@ xfs_trans_chunk_committed( lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { - struct xfs_mount *mp; struct xfs_ail *ailp; if (xfs_lic_isfree(licp, i)) { @@ -1426,7 +1425,6 @@ xfs_trans_chunk_committed( * This would cause the earlier transaction to fail * the test below. */ - mp = lip->li_mountp; ailp = lip->li_ailp; spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { @@ -1435,9 +1433,9 @@ xfs_trans_chunk_committed( * and update the position of the item in * the AIL. * - * xfs_trans_update_ail() drops the AIL lock. + * xfs_trans_ail_update() drops the AIL lock. */ - xfs_trans_update_ail(mp, lip, item_lsn); + xfs_trans_ail_update(ailp, lip, item_lsn); } else { spin_unlock(&ailp->xa_lock); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0df51547757..d6fe4a88d79 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -971,9 +971,6 @@ int _xfs_trans_commit(xfs_trans_t *, void xfs_trans_cancel(xfs_trans_t *, int); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); -void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); -void xfs_trans_unlocked_item(struct xfs_mount *, - xfs_log_item_t *); xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0cd47a797d3..67ee4663336 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -86,16 +86,16 @@ xfs_trans_ail_tail( * any of the objects, so the lock is not needed. */ void -xfs_trans_push_ail( - xfs_mount_t *mp, - xfs_lsn_t threshold_lsn) +xfs_trans_ail_push( + struct xfs_ail *ailp, + xfs_lsn_t threshold_lsn) { - xfs_log_item_t *lip; + xfs_log_item_t *lip; - lip = xfs_ail_min(mp->m_ail); - if (lip && !XFS_FORCED_SHUTDOWN(mp)) { - if (XFS_LSN_CMP(threshold_lsn, mp->m_ail->xa_target) > 0) - xfsaild_wakeup(mp->m_ail, threshold_lsn); + lip = xfs_ail_min(ailp); + if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { + if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) + xfsaild_wakeup(ailp, threshold_lsn); } } @@ -412,7 +412,7 @@ xfsaild_push( */ void xfs_trans_unlocked_item( - xfs_mount_t *mp, + struct xfs_ail *ailp, xfs_log_item_t *lip) { xfs_log_item_t *min_lip; @@ -424,7 +424,7 @@ xfs_trans_unlocked_item( * over some potentially valid data. */ if (!(lip->li_flags & XFS_LI_IN_AIL) || - XFS_FORCED_SHUTDOWN(mp)) { + XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { return; } @@ -440,10 +440,10 @@ xfs_trans_unlocked_item( * the call to xfs_log_move_tail() doesn't do anything if there's * not enough free space to wake people up so we're safe calling it. */ - min_lip = xfs_ail_min(mp->m_ail); + min_lip = xfs_ail_min(ailp); if (min_lip == lip) - xfs_log_move_tail(mp, 1); + xfs_log_move_tail(ailp->xa_mount, 1); } /* xfs_trans_unlocked_item */ @@ -460,12 +460,11 @@ xfs_trans_unlocked_item( * is dropped before returning. */ void -xfs_trans_update_ail( - xfs_mount_t *mp, +xfs_trans_ail_update( + struct xfs_ail *ailp, xfs_log_item_t *lip, xfs_lsn_t lsn) __releases(ailp->xa_lock) { - struct xfs_ail *ailp = mp->m_ail; xfs_log_item_t *dlip = NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ @@ -485,7 +484,7 @@ xfs_trans_update_ail( if (mlip == dlip) { mlip = xfs_ail_min(ailp); spin_unlock(&ailp->xa_lock); - xfs_log_move_tail(mp, mlip->li_lsn); + xfs_log_move_tail(ailp->xa_mount, mlip->li_lsn); } else { spin_unlock(&ailp->xa_lock); } @@ -509,11 +508,10 @@ xfs_trans_update_ail( * is dropped before returning. */ void -xfs_trans_delete_ail( - xfs_mount_t *mp, +xfs_trans_ail_delete( + struct xfs_ail *ailp, xfs_log_item_t *lip) __releases(ailp->xa_lock) { - struct xfs_ail *ailp = mp->m_ail; xfs_log_item_t *dlip; xfs_log_item_t *mlip; @@ -530,7 +528,8 @@ xfs_trans_delete_ail( if (mlip == dlip) { mlip = xfs_ail_min(ailp); spin_unlock(&ailp->xa_lock); - xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); + xfs_log_move_tail(ailp->xa_mount, + (mlip ? mlip->li_lsn : 0)); } else { spin_unlock(&ailp->xa_lock); } @@ -540,6 +539,8 @@ xfs_trans_delete_ail( * If the file system is not being shutdown, we are in * serious trouble if we get to this stage. */ + struct xfs_mount *mp = ailp->xa_mount; + spin_unlock(&ailp->xa_lock); if (!XFS_FORCED_SHUTDOWN(mp)) { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 4e855b5ced6..8ee2f8c8b0a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -527,9 +527,8 @@ xfs_trans_brelse(xfs_trans_t *tp, lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); if (lip->li_type == XFS_LI_BUF) { bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); - xfs_trans_unlocked_item( - bip->bli_item.li_mountp, - lip); + xfs_trans_unlocked_item(bip->bli_item.li_ailp, + lip); } } xfs_buf_relse(bp); @@ -626,7 +625,7 @@ xfs_trans_brelse(xfs_trans_t *tp, * tell the AIL that the buffer is being unlocked. */ if (bip != NULL) { - xfs_trans_unlocked_item(bip->bli_item.li_mountp, + xfs_trans_unlocked_item(bip->bli_item.li_ailp, (xfs_log_item_t*)bip); } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 6ca0a7a7e3d..73e2ad39743 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -85,12 +85,15 @@ struct xfs_ail { /* * From xfs_trans_ail.c */ -void xfs_trans_update_ail(struct xfs_mount *mp, - struct xfs_log_item *lip, xfs_lsn_t lsn) - __releases(mp->m_ail_lock); -void xfs_trans_delete_ail(struct xfs_mount *mp, - struct xfs_log_item *lip) - __releases(mp->m_ail_lock); +void xfs_trans_ail_update(struct xfs_ail *ailp, + struct xfs_log_item *lip, xfs_lsn_t lsn) + __releases(ailp->xa_lock); +void xfs_trans_ail_delete(struct xfs_ail *ailp, + struct xfs_log_item *lip) + __releases(ailp->xa_lock); +void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); +void xfs_trans_unlocked_item(struct xfs_ail *, + xfs_log_item_t *); xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); -- cgit v1.2.3 From 5a792c4579af8466246408e38fd4eff45d8493b8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:40:09 +1100 Subject: [XFS] XFS: Check for valid transaction headers in recovery When we are about to add a new item to a transaction in recovery, we need to check that it is valid first. Currently we just assert that header magic number matches, but in production systems that is not present and we add a corrupted transaction to the list to be processed. This results in a kernel oops later when processing the corrupted transaction. Instead, if we detect a corrupted transaction, abort recovery and leave the user to clean up the mess that has occurred. SGI-PV: 988145 SGI-Modid: xfs-linux-melb:xfs-kern:32356a Signed-off-by: David Chinner Signed-off-by: Tim Shimmin Signed-off-by: Eric Sandeen Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_log_recover.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index cff901efc24..b411d494731 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1417,7 +1417,13 @@ xlog_recover_add_to_trans( return 0; item = trans->r_itemq; if (item == NULL) { - ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); + /* we need to catch log corruptions here */ + if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { + xlog_warn("XFS: xlog_recover_add_to_trans: " + "bad header magic number"); + ASSERT(0); + return XFS_ERROR(EIO); + } if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); memcpy(&trans->r_theader, dp, len); /* d, s, l */ -- cgit v1.2.3 From 9d565ffa33d574c2a22442f9d95ca2bd0be7cc42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:53:24 +1100 Subject: [XFS] kill struct xfs_mount_args No need to parse the mount option into a structure before applying it to struct xfs_mount. The content of xfs_start_flags gets merged into xfs_parseargs. Calls inbetween don't care and can use mount members instead of the args struct. This patch uncovered that the mount option for shared filesystems wasn't ever exposed on Linux. The code to handle it is #if 0'ed in this patch pending a decision on this feature. I'll send a writeup about it to the list soon. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32371a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_super.c | 499 ++++++++++++++++--------------------------- fs/xfs/quota/xfs_qm.c | 1 - fs/xfs/quota/xfs_qm_bhv.c | 1 - fs/xfs/xfs_clnt.h | 105 --------- fs/xfs/xfs_dmops.c | 5 +- fs/xfs/xfs_mount.h | 4 +- fs/xfs/xfs_qmops.c | 5 +- fs/xfs/xfs_vfsops.c | 1 - 8 files changed, 195 insertions(+), 426 deletions(-) delete mode 100644 fs/xfs/xfs_clnt.h (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 655508ce2b7..5638a99fb74 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -18,7 +18,6 @@ #include "xfs.h" #include "xfs_bit.h" #include "xfs_log.h" -#include "xfs_clnt.h" #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" @@ -75,32 +74,6 @@ static struct super_operations xfs_super_operations; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; -STATIC struct xfs_mount_args * -xfs_args_allocate( - struct super_block *sb, - int silent) -{ - struct xfs_mount_args *args; - - args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL); - if (!args) - return NULL; - - args->logbufs = args->logbufsize = -1; - strncpy(args->fsname, sb->s_id, MAXNAMELEN); - - /* Copy the already-parsed mount(2) flags we're interested in */ - if (sb->s_flags & MS_DIRSYNC) - args->flags |= XFSMNT_DIRSYNC; - if (sb->s_flags & MS_SYNCHRONOUS) - args->flags |= XFSMNT_WSYNC; - if (silent) - args->flags |= XFSMNT_QUIET; - args->flags |= XFSMNT_32BITINODES; - - return args; -} - #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ #define MNTOPT_LOGDEV "logdev" /* log device */ @@ -189,26 +162,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base) return simple_strtoul((const char *)s, endp, base) << shift_left_factor; } +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock has _not_ yet been read in. + * + * Note that this function leaks the various device name allocations on + * failure. The caller takes care of them. + */ STATIC int xfs_parseargs( struct xfs_mount *mp, char *options, - struct xfs_mount_args *args, - int update) + char **mtpt) { + struct super_block *sb = mp->m_super; char *this_char, *value, *eov; - int dsunit, dswidth, vol_dsunit, vol_dswidth; - int iosize; + int dsunit = 0; + int dswidth = 0; + int iosize = 0; int dmapi_implies_ikeep = 1; + uchar_t iosizelog = 0; + + /* + * Copy binary VFS mount flags we are interested in. + */ + if (sb->s_flags & MS_RDONLY) + mp->m_flags |= XFS_MOUNT_RDONLY; + if (sb->s_flags & MS_DIRSYNC) + mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (sb->s_flags & MS_SYNCHRONOUS) + mp->m_flags |= XFS_MOUNT_WSYNC; + + /* + * Set some default flags that could be cleared by the mount option + * parsing. + */ + mp->m_flags |= XFS_MOUNT_BARRIER; + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - args->flags |= XFSMNT_BARRIER; - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + /* + * These can be overridden by the mount option parsing. + */ + mp->m_logbufs = -1; + mp->m_logbsize = -1; if (!options) goto done; - iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0; - while ((this_char = strsep(&options, ",")) != NULL) { if (!*this_char) continue; @@ -222,7 +223,7 @@ xfs_parseargs( this_char); return EINVAL; } - args->logbufs = simple_strtoul(value, &eov, 10); + mp->m_logbufs = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -230,7 +231,7 @@ xfs_parseargs( this_char); return EINVAL; } - args->logbufsize = suffix_strtoul(value, &eov, 10); + mp->m_logbsize = suffix_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -238,7 +239,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->logname, value, MAXNAMELEN); + mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_logname) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -246,7 +249,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->mtpt, value, MAXNAMELEN); + *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!*mtpt) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -254,7 +259,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->rtname, value, MAXNAMELEN); + mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_rtname) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -263,8 +270,7 @@ xfs_parseargs( return EINVAL; } iosize = simple_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = (uint8_t) iosize; + iosizelog = (uint8_t) iosize; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -273,8 +279,7 @@ xfs_parseargs( return EINVAL; } iosize = suffix_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = ffs(iosize) - 1; + iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_GRPID) || !strcmp(this_char, MNTOPT_BSDGROUPS)) { mp->m_flags |= XFS_MOUNT_GRPID; @@ -282,23 +287,25 @@ xfs_parseargs( !strcmp(this_char, MNTOPT_SYSVGROUPS)) { mp->m_flags &= ~XFS_MOUNT_GRPID; } else if (!strcmp(this_char, MNTOPT_WSYNC)) { - args->flags |= XFSMNT_WSYNC; + mp->m_flags |= XFS_MOUNT_WSYNC; } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { - args->flags |= XFSMNT_OSYNCISOSYNC; + mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { - args->flags |= XFSMNT_NORECOVERY; + mp->m_flags |= XFS_MOUNT_NORECOVERY; } else if (!strcmp(this_char, MNTOPT_INO64)) { - args->flags |= XFSMNT_INO64; -#if !XFS_BIG_INUMS +#if XFS_BIG_INUMS + mp->m_flags |= XFS_MOUNT_INO64; + mp->m_inoadd = XFS_INO64_OFFSET; +#else cmn_err(CE_WARN, "XFS: %s option not allowed on this system", this_char); return EINVAL; #endif } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { - args->flags |= XFSMNT_NOALIGN; + mp->m_flags |= XFS_MOUNT_NOALIGN; } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { - args->flags |= XFSMNT_SWALLOC; + mp->m_flags |= XFS_MOUNT_SWALLOC; } else if (!strcmp(this_char, MNTOPT_SUNIT)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -316,7 +323,7 @@ xfs_parseargs( } dswidth = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { - args->flags &= ~XFSMNT_32BITINODES; + mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; #if !XFS_BIG_INUMS cmn_err(CE_WARN, "XFS: %s option not allowed on this system", @@ -324,56 +331,60 @@ xfs_parseargs( return EINVAL; #endif } else if (!strcmp(this_char, MNTOPT_NOUUID)) { - args->flags |= XFSMNT_NOUUID; + mp->m_flags |= XFS_MOUNT_NOUUID; } else if (!strcmp(this_char, MNTOPT_BARRIER)) { - args->flags |= XFSMNT_BARRIER; + mp->m_flags |= XFS_MOUNT_BARRIER; } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { - args->flags &= ~XFSMNT_BARRIER; + mp->m_flags &= ~XFS_MOUNT_BARRIER; } else if (!strcmp(this_char, MNTOPT_IKEEP)) { - args->flags |= XFSMNT_IKEEP; + mp->m_flags |= XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { dmapi_implies_ikeep = 0; - args->flags &= ~XFSMNT_IKEEP; + mp->m_flags &= ~XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { - args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; + mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; } else if (!strcmp(this_char, MNTOPT_ATTR2)) { - args->flags |= XFSMNT_ATTR2; + mp->m_flags |= XFS_MOUNT_ATTR2; } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { - args->flags &= ~XFSMNT_ATTR2; - args->flags |= XFSMNT_NOATTR2; + mp->m_flags &= ~XFS_MOUNT_ATTR2; + mp->m_flags |= XFS_MOUNT_NOATTR2; } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { - args->flags2 |= XFSMNT2_FILESTREAMS; + mp->m_flags |= XFS_MOUNT_FILESTREAMS; } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); - args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); + mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTA) || !strcmp(this_char, MNTOPT_UQUOTA) || !strcmp(this_char, MNTOPT_USRQUOTA)) { - args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_UQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || !strcmp(this_char, MNTOPT_UQUOTANOENF)) { - args->flags |= XFSMNT_UQUOTA; - args->flags &= ~XFSMNT_UQUOTAENF; + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_UQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_PQUOTA) || !strcmp(this_char, MNTOPT_PRJQUOTA)) { - args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { - args->flags |= XFSMNT_PQUOTA; - args->flags &= ~XFSMNT_PQUOTAENF; + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_GQUOTA) || !strcmp(this_char, MNTOPT_GRPQUOTA)) { - args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { - args->flags |= XFSMNT_GQUOTA; - args->flags &= ~XFSMNT_GQUOTAENF; + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_DMAPI)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_XDSM)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_DMI)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, "ihashsize")) { cmn_err(CE_WARN, "XFS: ihashsize no longer used, option is deprecated."); @@ -391,27 +402,29 @@ xfs_parseargs( } } - if (args->flags & XFSMNT_NORECOVERY) { - if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { - cmn_err(CE_WARN, - "XFS: no-recovery mounts must be read-only."); - return EINVAL; - } + /* + * no recovery flag requires a read-only mount + */ + if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && + !(mp->m_flags & XFS_MOUNT_RDONLY)) { + cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); + return EINVAL; } - if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { + if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { cmn_err(CE_WARN, "XFS: sunit and swidth options incompatible with the noalign option"); return EINVAL; } - if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { + if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && + (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { cmn_err(CE_WARN, "XFS: cannot mount with both project and group quota"); return EINVAL; } - if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { + if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) { printk("XFS: %s option needs the mount point option as well\n", MNTOPT_DMAPI); return EINVAL; @@ -439,27 +452,66 @@ xfs_parseargs( * Note that if "ikeep" or "noikeep" mount options are * supplied, then they are honored. */ - if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep) - args->flags |= XFSMNT_IKEEP; + if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep) + mp->m_flags |= XFS_MOUNT_IKEEP; - if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { +done: + if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { + /* + * At this point the superblock has not been read + * in, therefore we do not know the block size. + * Before the mount call ends we will convert + * these to FSBs. + */ if (dsunit) { - args->sunit = dsunit; - args->flags |= XFSMNT_RETERR; - } else { - args->sunit = vol_dsunit; + mp->m_dalign = dsunit; + mp->m_flags |= XFS_MOUNT_RETERR; } - dswidth ? (args->swidth = dswidth) : - (args->swidth = vol_dswidth); - } else { - args->sunit = args->swidth = 0; + + if (dswidth) + mp->m_swidth = dswidth; + } + + if (mp->m_logbufs != -1 && + mp->m_logbufs != 0 && + (mp->m_logbufs < XLOG_MIN_ICLOGS || + mp->m_logbufs > XLOG_MAX_ICLOGS)) { + cmn_err(CE_WARN, + "XFS: invalid logbufs value: %d [not %d-%d]", + mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); + return XFS_ERROR(EINVAL); + } + if (mp->m_logbsize != -1 && + mp->m_logbsize != 0 && + (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || + mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || + !is_power_of_2(mp->m_logbsize))) { + cmn_err(CE_WARN, + "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + mp->m_logbsize); + return XFS_ERROR(EINVAL); + } + + mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_fsname) + return ENOMEM; + mp->m_fsname_len = strlen(mp->m_fsname) + 1; + + if (iosizelog) { + if (iosizelog > XFS_MAX_IO_LOG || + iosizelog < XFS_MIN_IO_LOG) { + cmn_err(CE_WARN, + "XFS: invalid log iosize: %d [not %d-%d]", + iosizelog, XFS_MIN_IO_LOG, + XFS_MAX_IO_LOG); + return XFS_ERROR(EINVAL); + } + + mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; + mp->m_readio_log = iosizelog; + mp->m_writeio_log = iosizelog; } -done: - if (args->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - if (args->flags2) - args->flags |= XFSMNT_FLAGS2; return 0; } @@ -705,8 +757,7 @@ xfs_close_devices( */ STATIC int xfs_open_devices( - struct xfs_mount *mp, - struct xfs_mount_args *args) + struct xfs_mount *mp) { struct block_device *ddev = mp->m_super->s_bdev; struct block_device *logdev = NULL, *rtdev = NULL; @@ -715,14 +766,14 @@ xfs_open_devices( /* * Open real time and log devices - order is important. */ - if (args->logname[0]) { - error = xfs_blkdev_get(mp, args->logname, &logdev); + if (mp->m_logname) { + error = xfs_blkdev_get(mp, mp->m_logname, &logdev); if (error) goto out; } - if (args->rtname[0]) { - error = xfs_blkdev_get(mp, args->rtname, &rtdev); + if (mp->m_rtname) { + error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); if (error) goto out_close_logdev; @@ -1286,177 +1337,30 @@ xfs_fs_setxquota( Q_XSETPQLIM), id, (caddr_t)fdq); } -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock has _not_ yet been read in. - */ -STATIC int -xfs_start_flags( - struct xfs_mount_args *ap, - struct xfs_mount *mp) -{ - int error; - - /* Values are in BBs */ - if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { - /* - * At this point the superblock has not been read - * in, therefore we do not know the block size. - * Before the mount call ends we will convert - * these to FSBs. - */ - mp->m_dalign = ap->sunit; - mp->m_swidth = ap->swidth; - } - - if (ap->logbufs != -1 && - ap->logbufs != 0 && - (ap->logbufs < XLOG_MIN_ICLOGS || - ap->logbufs > XLOG_MAX_ICLOGS)) { - cmn_err(CE_WARN, - "XFS: invalid logbufs value: %d [not %d-%d]", - ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return XFS_ERROR(EINVAL); - } - mp->m_logbufs = ap->logbufs; - if (ap->logbufsize != -1 && - ap->logbufsize != 0 && - (ap->logbufsize < XLOG_MIN_RECORD_BSIZE || - ap->logbufsize > XLOG_MAX_RECORD_BSIZE || - !is_power_of_2(ap->logbufsize))) { - cmn_err(CE_WARN, - "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", - ap->logbufsize); - return XFS_ERROR(EINVAL); - } - - error = ENOMEM; - - mp->m_logbsize = ap->logbufsize; - mp->m_fsname_len = strlen(ap->fsname) + 1; - - mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL); - if (!mp->m_fsname) - goto out; - - if (ap->rtname[0]) { - mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL); - if (!mp->m_rtname) - goto out_free_fsname; - - } - - if (ap->logname[0]) { - mp->m_logname = kstrdup(ap->logname, GFP_KERNEL); - if (!mp->m_logname) - goto out_free_rtname; - } - - if (ap->flags & XFSMNT_WSYNC) - mp->m_flags |= XFS_MOUNT_WSYNC; -#if XFS_BIG_INUMS - if (ap->flags & XFSMNT_INO64) { - mp->m_flags |= XFS_MOUNT_INO64; - mp->m_inoadd = XFS_INO64_OFFSET; - } -#endif - if (ap->flags & XFSMNT_RETERR) - mp->m_flags |= XFS_MOUNT_RETERR; - if (ap->flags & XFSMNT_NOALIGN) - mp->m_flags |= XFS_MOUNT_NOALIGN; - if (ap->flags & XFSMNT_SWALLOC) - mp->m_flags |= XFS_MOUNT_SWALLOC; - if (ap->flags & XFSMNT_OSYNCISOSYNC) - mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; - if (ap->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_32BITINODES; - - if (ap->flags & XFSMNT_IOSIZE) { - if (ap->iosizelog > XFS_MAX_IO_LOG || - ap->iosizelog < XFS_MIN_IO_LOG) { - cmn_err(CE_WARN, - "XFS: invalid log iosize: %d [not %d-%d]", - ap->iosizelog, XFS_MIN_IO_LOG, - XFS_MAX_IO_LOG); - return XFS_ERROR(EINVAL); - } - - mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; - mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; - } - - if (ap->flags & XFSMNT_IKEEP) - mp->m_flags |= XFS_MOUNT_IKEEP; - if (ap->flags & XFSMNT_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (ap->flags & XFSMNT_ATTR2) - mp->m_flags |= XFS_MOUNT_ATTR2; - if (ap->flags & XFSMNT_NOATTR2) - mp->m_flags |= XFS_MOUNT_NOATTR2; - - if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - - /* - * no recovery flag requires a read-only mount - */ - if (ap->flags & XFSMNT_NORECOVERY) { - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - cmn_err(CE_WARN, - "XFS: tried to mount a FS read-write without recovery!"); - return XFS_ERROR(EINVAL); - } - mp->m_flags |= XFS_MOUNT_NORECOVERY; - } - - if (ap->flags & XFSMNT_NOUUID) - mp->m_flags |= XFS_MOUNT_NOUUID; - if (ap->flags & XFSMNT_BARRIER) - mp->m_flags |= XFS_MOUNT_BARRIER; - else - mp->m_flags &= ~XFS_MOUNT_BARRIER; - - if (ap->flags2 & XFSMNT2_FILESTREAMS) - mp->m_flags |= XFS_MOUNT_FILESTREAMS; - - if (ap->flags & XFSMNT_DMAPI) - mp->m_flags |= XFS_MOUNT_DMAPI; - return 0; - - - out_free_rtname: - kfree(mp->m_rtname); - out_free_fsname: - kfree(mp->m_fsname); - out: - return error; -} - /* * This function fills in xfs_mount_t fields based on mount args. * Note: the superblock _has_ now been read in. */ STATIC int xfs_finish_flags( - struct xfs_mount_args *ap, struct xfs_mount *mp) { int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); /* Fail a mount where the logbuf is smaller then the log stripe */ if (xfs_sb_version_haslogv2(&mp->m_sb)) { - if ((ap->logbufsize <= 0) && - (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { + if (mp->m_logbsize <= 0 && + mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { mp->m_logbsize = mp->m_sb.sb_logsunit; - } else if (ap->logbufsize > 0 && - ap->logbufsize < mp->m_sb.sb_logsunit) { + } else if (mp->m_logbsize > 0 && + mp->m_logbsize < mp->m_sb.sb_logsunit) { cmn_err(CE_WARN, "XFS: logbuf size must be greater than or equal to log stripe size"); return XFS_ERROR(EINVAL); } } else { /* Fail a mount if the logbuf is larger than 32K */ - if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { + if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { cmn_err(CE_WARN, "XFS: logbuf size for version 1 logs must be 16K or 32K"); return XFS_ERROR(EINVAL); @@ -1468,7 +1372,7 @@ xfs_finish_flags( * told by noattr2 to turn it off */ if (xfs_sb_version_hasattr2(&mp->m_sb) && - !(ap->flags & XFSMNT_NOATTR2)) + !(mp->m_flags & XFS_MOUNT_NOATTR2)) mp->m_flags |= XFS_MOUNT_ATTR2; /* @@ -1480,6 +1384,7 @@ xfs_finish_flags( return XFS_ERROR(EROFS); } +#if 0 /* shared mounts were never supported on Linux */ /* * check for shared mount. */ @@ -1502,25 +1407,11 @@ xfs_finish_flags( /* * Shared XFS V0 can't deal with DMI. Return EINVAL. */ - if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) + if (mp->m_sb.sb_shared_vn == 0 && + (mp->m_flags & XFS_MOUNT_DMAPI)) return XFS_ERROR(EINVAL); } - - if (ap->flags & XFSMNT_UQUOTA) { - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - if (ap->flags & XFSMNT_UQUOTAENF) - mp->m_qflags |= XFS_UQUOTA_ENFD; - } - - if (ap->flags & XFSMNT_GQUOTA) { - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - if (ap->flags & XFSMNT_GQUOTAENF) - mp->m_qflags |= XFS_OQUOTA_ENFD; - } else if (ap->flags & XFSMNT_PQUOTA) { - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - if (ap->flags & XFSMNT_PQUOTAENF) - mp->m_qflags |= XFS_OQUOTA_ENFD; - } +#endif return 0; } @@ -1533,16 +1424,12 @@ xfs_fs_fill_super( { struct inode *root; struct xfs_mount *mp = NULL; - struct xfs_mount_args *args; int flags = 0, error = ENOMEM; - - args = xfs_args_allocate(sb, silent); - if (!args) - return -ENOMEM; + char *mtpt = NULL; mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) - goto out_free_args; + goto out; spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_growlock); @@ -1554,12 +1441,9 @@ xfs_fs_fill_super( mp->m_super = sb; sb->s_fs_info = mp; - if (sb->s_flags & MS_RDONLY) - mp->m_flags |= XFS_MOUNT_RDONLY; - - error = xfs_parseargs(mp, (char *)data, args, 0); + error = xfs_parseargs(mp, (char *)data, &mtpt); if (error) - goto out_free_mp; + goto out_free_fsname; sb_min_blocksize(sb, BBSIZE); sb->s_xattr = xfs_xattr_handlers; @@ -1567,33 +1451,28 @@ xfs_fs_fill_super( sb->s_qcop = &xfs_quotactl_operations; sb->s_op = &xfs_super_operations; - error = xfs_dmops_get(mp, args); + error = xfs_dmops_get(mp); if (error) - goto out_free_mp; - error = xfs_qmops_get(mp, args); + goto out_free_fsname; + error = xfs_qmops_get(mp); if (error) goto out_put_dmops; - if (args->flags & XFSMNT_QUIET) + if (silent) flags |= XFS_MFSI_QUIET; - error = xfs_open_devices(mp, args); + error = xfs_open_devices(mp); if (error) goto out_put_qmops; if (xfs_icsb_init_counters(mp)) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; - /* - * Setup flags based on mount(2) options and then the superblock - */ - error = xfs_start_flags(args, mp); - if (error) - goto out_free_fsname; error = xfs_readsb(mp, flags); if (error) - goto out_free_fsname; - error = xfs_finish_flags(args, mp); + goto out_destroy_counters; + + error = xfs_finish_flags(mp); if (error) goto out_free_sb; @@ -1612,7 +1491,7 @@ xfs_fs_fill_super( if (error) goto out_filestream_unmount; - XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname); + XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); sb->s_dirt = 1; sb->s_magic = XFS_SB_MAGIC; @@ -1641,27 +1520,27 @@ xfs_fs_fill_super( if (error) goto fail_vnrele; - xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); + kfree(mtpt); - kfree(args); + xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); return 0; out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); - out_free_fsname: - xfs_free_fsname(mp); + out_destroy_counters: xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); out_put_qmops: xfs_qmops_put(mp); out_put_dmops: xfs_dmops_put(mp); - out_free_mp: + out_free_fsname: + xfs_free_fsname(mp); + kfree(mtpt); kfree(mp); - out_free_args: - kfree(args); + out: return -error; fail_vnrele: diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index db1986a205a..5b198d15e76 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -20,7 +20,6 @@ #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_clnt.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index eea2e60b456..9556df9f7da 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -20,7 +20,6 @@ #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_clnt.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h deleted file mode 100644 index d2ce5dd70d8..00000000000 --- a/fs/xfs/xfs_clnt.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_CLNT_H__ -#define __XFS_CLNT_H__ - -/* - * XFS arguments structure, constructed from the arguments we - * are passed via the mount system call. - * - * NOTE: The mount system call is handled differently between - * Linux and IRIX. In IRIX we worked work with a binary data - * structure coming in across the syscall interface from user - * space (the mount userspace knows about each filesystem type - * and the set of valid options for it, and converts the users - * argument string into a binary structure _before_ making the - * system call), and the ABI issues that this implies. - * - * In Linux, we are passed a comma separated set of options; - * ie. a NULL terminated string of characters. Userspace mount - * code does not have any knowledge of mount options expected by - * each filesystem type and so each filesystem parses its mount - * options in kernel space. - * - * For the Linux port, we kept this structure pretty much intact - * and use it internally (because the existing code groks it). - */ -struct xfs_mount_args { - int flags; /* flags -> see XFSMNT_... macros below */ - int flags2; /* flags -> see XFSMNT2_... macros below */ - int logbufs; /* Number of log buffers, -1 to default */ - int logbufsize; /* Size of log buffers, -1 to default */ - char fsname[MAXNAMELEN+1]; /* data device name */ - char rtname[MAXNAMELEN+1]; /* realtime device filename */ - char logname[MAXNAMELEN+1]; /* journal device filename */ - char mtpt[MAXNAMELEN+1]; /* filesystem mount point */ - int sunit; /* stripe unit (BBs) */ - int swidth; /* stripe width (BBs), multiple of sunit */ - uchar_t iosizelog; /* log2 of the preferred I/O size */ - int ihashsize; /* inode hash table size (buckets) */ -}; - -/* - * XFS mount option flags -- args->flags1 - */ -#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */ -#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount - * compatible */ -#define XFSMNT_INO64 0x00000004 /* move inode numbers up - * past 2^32 */ -#define XFSMNT_UQUOTA 0x00000008 /* user quota accounting */ -#define XFSMNT_PQUOTA 0x00000010 /* IRIX prj quota accounting */ -#define XFSMNT_UQUOTAENF 0x00000020 /* user quota limit - * enforcement */ -#define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit - * enforcement */ -#define XFSMNT_QUIET 0x00000080 /* don't report mount errors */ -#define XFSMNT_NOALIGN 0x00000200 /* don't allocate at - * stripe boundaries*/ -#define XFSMNT_RETERR 0x00000400 /* return error to user */ -#define XFSMNT_NORECOVERY 0x00000800 /* no recovery, implies - * read-only mount */ -#define XFSMNT_SHARED 0x00001000 /* shared XFS mount */ -#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */ -#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */ - /* (osyncisdsync is default) */ -#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */ -#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32 - * bits of address space */ -#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */ -#define XFSMNT_GQUOTAENF 0x00800000 /* group quota limit - * enforcement */ -#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */ -#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */ -#define XFSMNT_BARRIER 0x04000000 /* use write barriers */ -#define XFSMNT_IKEEP 0x08000000 /* inode cluster delete */ -#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width - * allocation */ -#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename - * symlink,mkdir,rmdir,mknod */ -#define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */ - -/* - * XFS mount option flags -- args->flags2 - */ -#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred - * I/O size in stat(2) */ -#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams - * allocator */ - -#endif /* __XFS_CLNT_H__ */ diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c index a1e55fb9d5d..e71e2581c0c 100644 --- a/fs/xfs/xfs_dmops.c +++ b/fs/xfs/xfs_dmops.c @@ -25,7 +25,6 @@ #include "xfs_inum.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_clnt.h" static struct xfs_dmops xfs_dmcore_stub = { @@ -38,9 +37,9 @@ static struct xfs_dmops xfs_dmcore_stub = { }; int -xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) +xfs_dmops_get(struct xfs_mount *mp) { - if (args->flags & XFSMNT_DMAPI) { + if (mp->m_flags & XFS_MOUNT_DMAPI) { cmn_err(CE_WARN, "XFS: dmapi support not available in this kernel."); return EINVAL; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d3b75257602..e3f618c84e4 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -515,9 +515,9 @@ extern void xfs_freesb(xfs_mount_t *); extern int xfs_fs_writable(xfs_mount_t *); extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); -extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); +extern int xfs_dmops_get(struct xfs_mount *); extern void xfs_dmops_put(struct xfs_mount *); -extern int xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *); +extern int xfs_qmops_get(struct xfs_mount *); extern void xfs_qmops_put(struct xfs_mount *); extern struct xfs_dmops xfs_dmcore_xfs; diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index a294e58db8d..27f80581520 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -28,7 +28,6 @@ #include "xfs_mount.h" #include "xfs_quota.h" #include "xfs_error.h" -#include "xfs_clnt.h" STATIC struct xfs_dquot * @@ -131,9 +130,9 @@ static struct xfs_qmops xfs_qmcore_stub = { }; int -xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) +xfs_qmops_get(struct xfs_mount *mp) { - if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) { + if (XFS_IS_QUOTA_RUNNING(mp)) { #ifdef CONFIG_XFS_QUOTA mp->m_qm_ops = &xfs_qmcore_xfs; #else diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 883dd0f68e9..305d9f3948e 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -49,7 +49,6 @@ #include "xfs_extfree_item.h" #include "xfs_acl.h" #include "xfs_attr.h" -#include "xfs_clnt.h" #include "xfs_mru_cache.h" #include "xfs_filestream.h" #include "xfs_fsops.h" -- cgit v1.2.3 From 469fc23d5dd172665c69099db776c39bb1b70f92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:54:57 +1100 Subject: [XFS] fix the noquota mount option Noquota should clear all mount options, and not just user and group quota. Probably doesn't matter very much in real life. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32372a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 5638a99fb74..62d48588849 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -355,6 +355,7 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTA) || !strcmp(this_char, MNTOPT_UQUOTA) || -- cgit v1.2.3 From 1ec7944beb6f3c29f1e58a66422130133727e9e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:55:08 +1100 Subject: [XFS] fix biosize option iosizelog shouldn't be the same as iosize but the logarithm of it. Then again the current biosize option doesn't make much sense to me as it doesn't set the preferred I/O size as mentioned in the comment next to it but rather the allocation size and thus is identical to the allocsize option (except for the missing logarithm). It's also not documented in Documentation/filesystems/xfs.txt or the mount manpage. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32373a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 62d48588849..c3d004bc462 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -270,7 +270,7 @@ xfs_parseargs( return EINVAL; } iosize = simple_strtoul(value, &eov, 10); - iosizelog = (uint8_t) iosize; + iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, -- cgit v1.2.3 From 2b7035fd7473c799ca3372092d72c768c7db329d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:55:18 +1100 Subject: [XFS] Trivial xfs_remove comment fixup The dp to ip comment should be for the unconditional xfs_droplink call, and the "." link obviously only exists for directories, so it should be in the is_dir conditional. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32374a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_vnodeops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1d15a320b9a..1c890113ab3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2009,7 +2009,7 @@ xfs_remove( goto out_bmap_cancel; /* - * Drop the link from dp to ip. + * Drop the "." link from ip to self. */ error = xfs_droplink(tp, ip); if (error) @@ -2024,7 +2024,7 @@ xfs_remove( } /* - * Drop the "." link from ip to self. + * Drop the link from dp to ip. */ error = xfs_droplink(tp, ip); if (error) -- cgit v1.2.3 From 56e73ec47d749047f441e6b9d60d964535d31c3b Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:55:27 +1100 Subject: [XFS] Can't lock inodes in radix tree preload region When we are inside a radix tree preload region, we cannot sleep. Recently we moved the inode locking inside the preload region for the inode radix tree. Fix that, and fix a missed unlock in another error path in the same code at the same time. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32385a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_iget.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index a1f209b0596..377c0cd1499 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -159,18 +159,19 @@ xfs_iget_cache_miss( goto out_destroy; } + if (lock_flags) + xfs_ilock(ip, lock_flags); + /* * Preload the radix tree so we can insert safely under the - * write spinlock. + * write spinlock. Note that we cannot sleep inside the preload + * region. */ if (radix_tree_preload(GFP_KERNEL)) { error = EAGAIN; - goto out_destroy; + goto out_unlock; } - if (lock_flags) - xfs_ilock(ip, lock_flags); - mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = agino & mask; write_lock(&pag->pag_ici_lock); @@ -181,7 +182,7 @@ xfs_iget_cache_miss( WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); error = EAGAIN; - goto out_unlock; + goto out_preload_end; } /* These values _must_ be set before releasing the radix tree lock! */ @@ -193,9 +194,12 @@ xfs_iget_cache_miss( *ipp = ip; return 0; -out_unlock: +out_preload_end: write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); +out_unlock: + if (lock_flags) + xfs_iunlock(ip, lock_flags); out_destroy: xfs_idestroy(ip); return error; -- cgit v1.2.3 From 455486b9ccdd0a1d7432a03302f549b1c917c181 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 18:03:14 +1100 Subject: [XFS] avoid all reclaimable inodes in xfs_sync_inodes_ag If we are syncing data in xfs_sync_inodes_ag(), the VFS inode must still be referencable as the dirty data state is carried on the VFS inode. hence if we can't get a reference via igrab(), the inode must be in reclaim which implies that it has no dirty data attached. Leave such inodes to the reclaim code to flush the dirty inode state to disk and so avoid attempting to access the VFS inode when it may not exist in xfs_sync_inodes_ag(). Version 4: o don't reference linux inode until after igrab() succeeds Version 3: o converted unlock/rele to an xfs_iput() call. Version 2: o change igrab logic to be more linear o remove initial reclaimable inode check now that we are using igrab() failure to find reclaimable inodes o assert that igrab failure occurs only on reclaimable inodes o clean up inode locking - only grab the iolock if we are doing a SYNC_DELWRI call and we have a dirty inode. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32391a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Peter Leckie Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_sync.c | 75 +++++++++++---------------------------------- 1 file changed, 18 insertions(+), 57 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ee1648b179f..fb5cca3df84 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -63,25 +63,16 @@ xfs_sync_inodes_ag( int error = 0; int last_error = 0; int fflag = XFS_B_ASYNC; - int lock_flags = XFS_ILOCK_SHARED; if (flags & SYNC_DELWRI) fflag = XFS_B_DELWRI; if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ - if (flags & SYNC_DELWRI) { - /* - * We need the I/O lock if we're going to call any of - * the flush/inval routines. - */ - lock_flags |= XFS_IOLOCK_SHARED; - } - do { struct inode *inode; - boolean_t inode_refed; xfs_inode_t *ip = NULL; + int lock_flags = XFS_ILOCK_SHARED; /* * use a gang lookup to find the next inode in the tree @@ -109,22 +100,6 @@ xfs_sync_inodes_ag( break; } - /* - * skip inodes in reclaim. Let xfs_syncsub do that for - * us so we don't need to worry. - */ - if (xfs_iflags_test(ip, (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { - read_unlock(&pag->pag_ici_lock); - continue; - } - - /* bad inodes are dealt with elsewhere */ - inode = VFS_I(ip); - if (is_bad_inode(inode)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - /* nothing to sync during shutdown */ if (XFS_FORCED_SHUTDOWN(mp)) { read_unlock(&pag->pag_ici_lock); @@ -132,42 +107,34 @@ xfs_sync_inodes_ag( } /* - * If we can't get a reference on the VFS_I, the inode must be - * in reclaim. If we can get the inode lock without blocking, - * it is safe to flush the inode because we hold the tree lock - * and xfs_iextract will block right now. Hence if we lock the - * inode while holding the tree lock, xfs_ireclaim() is - * guaranteed to block on the inode lock we now hold and hence - * it is safe to reference the inode until we drop the inode - * locks completely. + * If we can't get a reference on the inode, it must be + * in reclaim. Leave it for the reclaim code to flush. */ - inode_refed = B_FALSE; - if (igrab(inode)) { - read_unlock(&pag->pag_ici_lock); - xfs_ilock(ip, lock_flags); - inode_refed = B_TRUE; - } else { - if (!xfs_ilock_nowait(ip, lock_flags)) { - /* leave it to reclaim */ - read_unlock(&pag->pag_ici_lock); - continue; - } + inode = VFS_I(ip); + if (!igrab(inode)) { read_unlock(&pag->pag_ici_lock); + continue; + } + read_unlock(&pag->pag_ici_lock); + + /* bad inodes are dealt with elsewhere */ + if (is_bad_inode(inode)) { + IRELE(ip); + continue; } /* * If we have to flush data or wait for I/O completion - * we need to drop the ilock that we currently hold. - * If we need to drop the lock, insert a marker if we - * have not already done so. + * we need to hold the iolock. */ if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_ilock(ip, XFS_IOLOCK_SHARED); + lock_flags |= XFS_IOLOCK_SHARED; error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); if (flags & SYNC_IOWAIT) vn_iowait(ip); - xfs_ilock(ip, XFS_ILOCK_SHARED); } + xfs_ilock(ip, XFS_ILOCK_SHARED); if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { if (flags & SYNC_WAIT) { @@ -183,13 +150,7 @@ xfs_sync_inodes_ag( xfs_ifunlock(ip); } } - - if (lock_flags) - xfs_iunlock(ip, lock_flags); - - if (inode_refed) { - IRELE(ip); - } + xfs_iput(ip, lock_flags); if (error) last_error = error; -- cgit v1.2.3 From c679eef0520eb3c2c731fce505e61b8ef9469aac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:04:13 +1100 Subject: [XFS] stop using xfs_itobp in xfs_bulkstat xfs_bulkstat only wants the dinode, offset and buffer from a given inode number. Instead of using xfs_itobp on a fake inode which is complicated and currently leads to leaks of the security data just use xfs_inotobp which is designed to do exactly the kind of lookup xfs_bulkstat wants. The only thing that's missing in xfs_inotobp is a flags paramter that let's us pass down XFS_IMAP_BULKSTAT, but that can easily added. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32397a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: David Chinner --- fs/xfs/xfs_inode.c | 13 +++++++------ fs/xfs/xfs_inode.h | 6 ++++-- fs/xfs/xfs_itable.c | 21 ++++++++------------- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c83f6998f95..35e419191ab 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -222,25 +222,26 @@ xfs_imap_to_bp( * Use xfs_imap() to determine the size and location of the * buffer to read from disk. */ -STATIC int +int xfs_inotobp( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, xfs_dinode_t **dipp, xfs_buf_t **bpp, - int *offset) + int *offset, + uint imap_flags) { xfs_imap_t imap; xfs_buf_t *bp; int error; imap.im_blkno = 0; - error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); + error = xfs_imap(mp, tp, ino, &imap, imap_flags | XFS_IMAP_LOOKUP); if (error) return error; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); if (error) return error; @@ -792,7 +793,7 @@ xfs_dic2xflags( /* * Allocate and initialise an xfs_inode. */ -struct xfs_inode * +STATIC struct xfs_inode * xfs_inode_alloc( struct xfs_mount *mp, xfs_ino_t ino) @@ -2046,7 +2047,7 @@ xfs_iunlink_remove( } next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); error = xfs_inotobp(mp, tp, next_ino, &last_dip, - &last_ibp, &last_offset); + &last_ibp, &last_offset, 0); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a5aeb9cfeae..5d12cfeb43c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -158,7 +158,7 @@ typedef struct xfs_icdinode { #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ /* - * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate(). + * Flags for xfs_inotobp, xfs_itobp(), xfs_imap() and xfs_dilocate(). */ #define XFS_IMAP_LOOKUP 0x1 #define XFS_IMAP_BULKSTAT 0x2 @@ -514,7 +514,6 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); -struct xfs_inode * xfs_inode_alloc(struct xfs_mount *, xfs_ino_t); void xfs_idestroy(xfs_inode_t *); void xfs_iextract(xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); @@ -531,6 +530,9 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *); #endif /* __KERNEL__ */ +int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, + xfs_ino_t, struct xfs_dinode **, + struct xfs_buf **, int *, uint); int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, struct xfs_buf **, xfs_daddr_t, uint, uint); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 42a214b8df9..35118032a5d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -359,7 +359,6 @@ xfs_bulkstat( int ubused; /* bytes used by formatter */ xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ xfs_dinode_t *dip; /* ptr into bp for specific inode */ - xfs_inode_t *ip; /* ptr to in-core inode struct */ /* * Get the last inode value, see if there's nothing to do. @@ -585,6 +584,8 @@ xfs_bulkstat( if (flags & (BULKSTAT_FG_QUICK | BULKSTAT_FG_INLINE)) { + int offset; + ino = XFS_AGINO_TO_INO(mp, agno, agino); bno = XFS_AGB_TO_DADDR(mp, agno, @@ -595,19 +596,13 @@ xfs_bulkstat( */ if (bp) xfs_buf_relse(bp); - ip = xfs_inode_alloc(mp, ino); - if (!ip) { - bp = NULL; - rval = ENOMEM; - break; - } - error = xfs_itobp(mp, NULL, ip, - &dip, &bp, bno, - XFS_IMAP_BULKSTAT, - XFS_BUF_LOCK); + + error = xfs_inotobp(mp, NULL, ino, &dip, + &bp, &offset, + XFS_IMAP_BULKSTAT); + if (!error) - clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; - xfs_idestroy(ip); + clustidx = offset / mp->m_sb.sb_inodesize; if (XFS_TEST_ERROR(error != 0, mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, XFS_RANDOM_BULKSTAT_READ_CHUNK)) { -- cgit v1.2.3 From 087e3b0460c367d0f4a5b71d7b013968ae23b588 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:24:37 +1100 Subject: Inode: export symbol destroy_inode To make sure we free the security data inodes need to be freed using the proper VFS helper (which we also need to export for this). We mark these inodes bad so we can skip the flush path for them. Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: David Chinner --- fs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index fbcf6c5e760..f84ba338faf 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -212,6 +212,7 @@ void destroy_inode(struct inode *inode) else kmem_cache_free(inode_cachep, (inode)); } +EXPORT_SYMBOL(destroy_inode); /* -- cgit v1.2.3 From 9ed0451ee0a13469f7b38e4ced8974036f6d114f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:26:04 +1100 Subject: [XFS] free partially initialized inodes using destroy_inode To make sure we free the security data inodes need to be freed using the proper VFS helper (which we also need to export for this). We mark these inodes bad so we can skip the flush path for them. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32398a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: David Chinner --- fs/xfs/xfs_iget.c | 2 +- fs/xfs/xfs_inode.c | 21 +++++++++++---------- fs/xfs/xfs_inode.h | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 377c0cd1499..837cae78153 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -201,7 +201,7 @@ out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: - xfs_idestroy(ip); + xfs_destroy_inode(ip); return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 35e419191ab..cd522827f99 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -898,18 +898,14 @@ xfs_iread( * know that this is a new incore inode. */ error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); - if (error) { - xfs_idestroy(ip); - return error; - } + if (error) + goto out_destroy_inode; /* * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { - xfs_idestroy(ip); - xfs_trans_brelse(tp, bp); #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " "dip->di_core.di_magic (0x%x) != " @@ -917,7 +913,8 @@ xfs_iread( be16_to_cpu(dip->di_core.di_magic), XFS_DINODE_MAGIC); #endif /* DEBUG */ - return XFS_ERROR(EINVAL); + error = XFS_ERROR(EINVAL); + goto out_brelse; } /* @@ -931,14 +928,12 @@ xfs_iread( xfs_dinode_from_disk(&ip->i_d, &dip->di_core); error = xfs_iformat(ip, dip); if (error) { - xfs_idestroy(ip); - xfs_trans_brelse(tp, bp); #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " "xfs_iformat() returned error %d", error); #endif /* DEBUG */ - return error; + goto out_brelse; } } else { ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic); @@ -1004,6 +999,12 @@ xfs_iread( xfs_trans_brelse(tp, bp); *ipp = ip; return 0; + + out_brelse: + xfs_trans_brelse(tp, bp); + out_destroy_inode: + xfs_destroy_inode(ip); + return error; } /* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 5d12cfeb43c..7f007ef4bbb 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -309,6 +309,23 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) return &ip->i_vnode; } +/* + * Get rid of a partially initialized inode. + * + * We have to go through destroy_inode to make sure allocations + * from init_inode_always like the security data are undone. + * + * We mark the inode bad so that it takes the short cut in + * the reclaim path instead of going through the flush path + * which doesn't make sense for an inode that has never seen the + * light of day. + */ +static inline void xfs_destroy_inode(struct xfs_inode *ip) +{ + make_bad_inode(VFS_I(ip)); + return destroy_inode(VFS_I(ip)); +} + /* * i_flags helper functions */ -- cgit v1.2.3 From 7ee49acfe54883f16d28d9486b789431a5804d18 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 18:26:51 +1100 Subject: [XFS] correctly select first log item to push Under heavy metadata load we are seeing log hangs. The AIL has items in it ready to be pushed, and they are within the push target window. However, we are not pushing them when the last pushed LSN is less than the LSN of the first log item on the AIL. This is a regression introduced by the AIL push cursor modifications. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32409a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Tim Shimmin --- fs/xfs/xfs_trans_ail.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 67ee4663336..2d47f10f8be 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -228,7 +228,7 @@ xfs_trans_ail_cursor_first( list_for_each_entry(lip, &ailp->xa_ail, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) - break; + goto out; } lip = NULL; out: -- cgit v1.2.3 From ea5a3dc8356bf1cf27bab9a5a0da5dfbbb82013d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:27:48 +1100 Subject: [XFS] kill sys_cred capable_cred has been unused for a while so we can kill it and sys_cred. That also means the cred argument to xfs_setattr and xfs_change_file_space can be removed now. SGI-PV: 988918 SGI-Modid: xfs-linux-melb:xfs-kern:32412a Signed-off-by: Christoph Hellwig Signed-off-by: Tim Shimmin Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_cred.h | 8 -------- fs/xfs/linux-2.6/xfs_globals.c | 7 ------- fs/xfs/linux-2.6/xfs_globals.h | 1 - fs/xfs/linux-2.6/xfs_ioctl.c | 3 +-- fs/xfs/linux-2.6/xfs_iops.c | 6 +++--- fs/xfs/xfs_acl.c | 2 +- fs/xfs/xfs_vnodeops.c | 6 ++---- fs/xfs/xfs_vnodeops.h | 6 ++---- 8 files changed, 9 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 652721ce0ea..98da2199bc2 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -27,12 +27,4 @@ typedef struct cred { /* EMPTY */ } cred_t; -extern struct cred *sys_cred; - -/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ -static inline int capable_cred(cred_t *cr, int cid) -{ - return (cr == sys_cred) ? 1 : capable(cid); -} - #endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index ef90e64641e..46e862b004e 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -43,10 +43,3 @@ xfs_param_t xfs_params = { .inherit_nodfrg = { 0, 1, 1 }, .fstrm_timer = { 1, 30*100, 3600*100}, }; - -/* - * Global system credential structure. - */ -static cred_t sys_cred_val; -cred_t *sys_cred = &sys_cred_val; - diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h index 2770b0085ee..69f71caf061 100644 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ b/fs/xfs/linux-2.6/xfs_globals.h @@ -19,6 +19,5 @@ #define __XFS_GLOBALS_H__ extern uint64_t xfs_panic_mask; /* set to cause more panics */ -extern struct cred *sys_cred; #endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d3438c72dca..b5ea3f2afdc 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -691,8 +691,7 @@ xfs_ioc_space( if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; - error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, - NULL, attr_flags); + error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, attr_flags); return -error; } diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 37bb1012aff..f78bc221576 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -601,7 +601,7 @@ xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { - return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL); + return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } /* @@ -642,7 +642,7 @@ xfs_vn_fallocate( xfs_ilock(ip, XFS_IOLOCK_EXCL); error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, - 0, NULL, XFS_ATTR_NOLOCK); + 0, XFS_ATTR_NOLOCK); if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) new_size = offset + len; @@ -653,7 +653,7 @@ xfs_vn_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL); + error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b2f639a1416..8b3d1bdeb44 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -758,7 +758,7 @@ xfs_acl_setmode( if (gap && nomask) iattr.ia_mode |= gap->ae_perm << 3; - return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); + return xfs_setattr(XFS_I(vp), &iattr, 0); } /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1c890113ab3..34a1982ed6d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -79,8 +79,7 @@ int xfs_setattr( struct xfs_inode *ip, struct iattr *iattr, - int flags, - cred_t *credp) + int flags) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -3362,7 +3361,6 @@ xfs_change_file_space( int cmd, xfs_flock64_t *bf, xfs_off_t offset, - cred_t *credp, int attr_flags) { xfs_mount_t *mp = ip->i_mount; @@ -3450,7 +3448,7 @@ xfs_change_file_space( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = startoffset; - error = xfs_setattr(ip, &iattr, attr_flags, credp); + error = xfs_setattr(ip, &iattr, attr_flags); if (error) return error; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index e932a96bec5..b1ae8e3f404 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -15,8 +15,7 @@ struct xfs_iomap; int xfs_open(struct xfs_inode *ip); -int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, - struct cred *credp); +int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ @@ -44,8 +43,7 @@ int xfs_inode_flush(struct xfs_inode *ip, int flags); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_reclaim(struct xfs_inode *ip); int xfs_change_file_space(struct xfs_inode *ip, int cmd, - xfs_flock64_t *bf, xfs_off_t offset, - struct cred *credp, int attr_flags); + xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); -- cgit v1.2.3 From e0b8e8b65d578f5d5538465dff8392cf02e1cc5d Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Thu, 30 Oct 2008 18:30:48 +1100 Subject: [XFS] remove restricted chown parameter from xfs linux On Linux all filesystems are supposed to be operating under Posix' restricted chown. Restricted chown means it restricts chown to the owner unless you have CAP_FOWNER. NOTE: that 2 files outside of fs/xfs have been modified too for this change. Reviewed-by: Dave Chinner SGI-PV: 988919 SGI-Modid: xfs-linux-melb:xfs-kern:32413a Signed-off-by: Tim Shimmin Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_globals.c | 1 - fs/xfs/linux-2.6/xfs_ioctl.c | 4 ---- fs/xfs/linux-2.6/xfs_linux.h | 1 - fs/xfs/linux-2.6/xfs_sysctl.c | 11 ----------- fs/xfs/linux-2.6/xfs_sysctl.h | 3 +-- fs/xfs/xfs_vnodeops.c | 13 ++----------- 6 files changed, 3 insertions(+), 30 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 46e862b004e..2ae8b1ccb02 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -26,7 +26,6 @@ */ xfs_param_t xfs_params = { /* MIN DFLT MAX */ - .restrict_chown = { 0, 1, 1 }, .sgid_inherit = { 0, 0, 1 }, .symlink_mode = { 0, 0, 1 }, .panic_mask = { 0, 0, 255 }, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index b5ea3f2afdc..d25694e8cd6 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1103,10 +1103,6 @@ xfs_ioctl_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & FSX_PROJID) { /* diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index cc0f7b3a979..214717650b2 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -107,7 +107,6 @@ #undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ #endif -#define restricted_chown xfs_params.restrict_chown.val #define irix_sgid_inherit xfs_params.sgid_inherit.val #define irix_symlink_mode xfs_params.symlink_mode.val #define xfs_panic_mask xfs_params.panic_mask.val diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7dacb5bbde3..916c0ffb608 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -55,17 +55,6 @@ xfs_stats_clear_proc_handler( #endif /* CONFIG_PROC_FS */ static ctl_table xfs_table[] = { - { - .ctl_name = XFS_RESTRICT_CHOWN, - .procname = "restrict_chown", - .data = &xfs_params.restrict_chown.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xfs_params.restrict_chown.min, - .extra2 = &xfs_params.restrict_chown.max - }, { .ctl_name = XFS_SGID_INHERIT, .procname = "irix_sgid_inherit", diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h index 4aadb8056c3..b9937d450f8 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/linux-2.6/xfs_sysctl.h @@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val { } xfs_sysctl_val_t; typedef struct xfs_param { - xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/ xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is * not a member of parent dir GID. */ xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ @@ -68,7 +67,7 @@ typedef struct xfs_param { enum { /* XFS_REFCACHE_SIZE = 1 */ /* XFS_REFCACHE_PURGE = 2 */ - XFS_RESTRICT_CHOWN = 3, + /* XFS_RESTRICT_CHOWN = 3 */ XFS_SGID_INHERIT = 4, XFS_SYMLINK_MODE = 5, XFS_PANIC_MASK = 6, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 34a1982ed6d..c45ea278ef4 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -232,10 +232,6 @@ xfs_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & (ATTR_UID|ATTR_GID)) { /* @@ -259,9 +255,8 @@ xfs_setattr( * shall be equal to either the group ID or one of the * supplementary group IDs of the calling process. */ - if (restricted_chown && - (iuid != uid || (igid != gid && - !in_group_p((gid_t)gid))) && + if ((iuid != uid || + (igid != gid && !in_group_p((gid_t)gid))) && !capable(CAP_CHOWN)) { code = XFS_ERROR(EPERM); goto error_return; @@ -455,10 +450,6 @@ xfs_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & (ATTR_UID|ATTR_GID)) { /* -- cgit v1.2.3 From 6bfb3d065f4c498c17a3a07f3dc08cedff53aff4 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 18:32:43 +1100 Subject: [XFS] Fix race when looking up reclaimable inodes If we get a race looking up a reclaimable inode, we can end up with the winner proceeding to use the inode before it has been completely re-initialised. This is a Bad Thing. Fix the race by checking whether we are still initialising the inod eonce we have a reference to it, and if so wait for the initialisation to complete before continuing. While there, fix a leaked reference count in the same code when encountering an unlinked inode and we are not doing a lookup for a create operation. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32429a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_linux.h | 1 + fs/xfs/xfs_iget.c | 32 ++++++++++++++++++++++---------- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 214717650b2..77d6ddcaf54 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -77,6 +77,7 @@ #include #include #include +#include #include #include diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 837cae78153..bf4dc5eb4cf 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -52,7 +52,7 @@ xfs_iget_cache_hit( int lock_flags) __releases(pag->pag_ici_lock) { struct xfs_mount *mp = ip->i_mount; - int error = 0; + int error = EAGAIN; /* * If INEW is set this inode is being set up @@ -60,7 +60,6 @@ xfs_iget_cache_hit( * Pause and try again. */ if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { - error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); goto out_error; } @@ -73,7 +72,6 @@ xfs_iget_cache_hit( * error immediately so we don't remove it from the reclaim * list and potentially leak the inode. */ - if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { error = ENOENT; goto out_error; @@ -91,27 +89,42 @@ xfs_iget_cache_hit( error = ENOMEM; goto out_error; } + + /* + * We must set the XFS_INEW flag before clearing the + * XFS_IRECLAIMABLE flag so that if a racing lookup does + * not find the XFS_IRECLAIMABLE above but has the igrab() + * below succeed we can safely check XFS_INEW to detect + * that this inode is still being initialised. + */ xfs_iflags_set(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_IRECLAIMABLE); /* clear the radix tree reclaim flag as well. */ __xfs_inode_clear_reclaim_tag(mp, pag, ip); - read_unlock(&pag->pag_ici_lock); } else if (!igrab(VFS_I(ip))) { /* If the VFS inode is being torn down, pause and try again. */ - error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); goto out_error; - } else { - /* we've got a live one */ - read_unlock(&pag->pag_ici_lock); + } else if (xfs_iflags_test(ip, XFS_INEW)) { + /* + * We are racing with another cache hit that is + * currently recycling this inode out of the XFS_IRECLAIMABLE + * state. Wait for the initialisation to complete before + * continuing. + */ + wait_on_inode(VFS_I(ip)); } if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { error = ENOENT; - goto out; + iput(VFS_I(ip)); + goto out_error; } + /* We've got a live one. */ + read_unlock(&pag->pag_ici_lock); + if (lock_flags != 0) xfs_ilock(ip, lock_flags); @@ -122,7 +135,6 @@ xfs_iget_cache_hit( out_error: read_unlock(&pag->pag_ici_lock); -out: return error; } -- cgit v1.2.3 From 91b777125175077fb74025608dba87f100586c62 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Oct 2008 15:50:04 +1100 Subject: CRED: Wrap task credential accesses in the XFS filesystem Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn --- fs/xfs/linux-2.6/xfs_cred.h | 2 +- fs/xfs/linux-2.6/xfs_ioctl.c | 2 +- fs/xfs/xfs_acl.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 98da2199bc2..e279d00779f 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -24,7 +24,7 @@ * Credentials */ typedef struct cred { - /* EMPTY */ + /* EMPTY */ } cred_t; #endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d25694e8cd6..f1bd6c36e6f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1006,7 +1006,7 @@ xfs_ioctl_setattr( * to the file owner ID, except in cases where the * CAP_FSETID capability is applicable. */ - if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) { + if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { code = XFS_ERROR(EPERM); goto error_return; } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 8b3d1bdeb44..a8cdd73999a 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -366,7 +366,7 @@ xfs_acl_allow_set( return ENOTDIR; if (vp->i_sb->s_flags & MS_RDONLY) return EROFS; - if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) + if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER)) return EPERM; return 0; } @@ -413,13 +413,13 @@ xfs_acl_access( switch (fap->acl_entry[i].ae_tag) { case ACL_USER_OBJ: seen_userobj = 1; - if (fuid != current->fsuid) + if (fuid != current_fsuid()) continue; matched.ae_tag = ACL_USER_OBJ; matched.ae_perm = allows; break; case ACL_USER: - if (fap->acl_entry[i].ae_id != current->fsuid) + if (fap->acl_entry[i].ae_id != current_fsuid()) continue; matched.ae_tag = ACL_USER; matched.ae_perm = allows; -- cgit v1.2.3 From 644c3567d16b7e53cf52ae98c4150d601c9eacfe Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 10 Nov 2008 16:50:24 +1100 Subject: [XFS] handle memory allocation failures during log initialisation When there is no memory left in the system, xfs_buf_get_noaddr() can fail. If this happens at mount time during xlog_alloc_log() we fail to catch the error and oops. Catch the error from xfs_buf_get_noaddr(), and allow other memory allocations to fail and catch those errors too. Report the error to the console and fail the mount with ENOMEM. Tested by manually injecting errors into xfs_buf_get_noaddr() and xlog_alloc_log(). Version 2: o remove unnecessary casts of the returned pointer from kmem_zalloc() SGI-PV: 987246 Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_log.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 51840170b16..92c20a8d9e6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -563,6 +563,11 @@ xfs_log_mount( } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); + if (!mp->m_log) { + cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); + error = ENOMEM; + goto out; + } /* * Initialize the AIL now we have a log. @@ -601,6 +606,7 @@ xfs_log_mount( return 0; error: xfs_log_unmount_dealloc(mp); +out: return error; } /* xfs_log_mount */ @@ -1217,7 +1223,9 @@ xlog_alloc_log(xfs_mount_t *mp, int i; int iclogsize; - log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); + log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); + if (!log) + return NULL; log->l_mp = mp; log->l_targ = log_target; @@ -1249,6 +1257,8 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_get_iclog_buffer_size(mp, log); bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_log; XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); @@ -1275,13 +1285,17 @@ xlog_alloc_log(xfs_mount_t *mp, iclogsize = log->l_iclog_size; ASSERT(log->l_iclog_size >= 4096); for (i=0; i < log->l_iclog_bufs; i++) { - *iclogp = (xlog_in_core_t *) - kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); + *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); + if (!*iclogp) + goto out_free_iclog; + iclog = *iclogp; iclog->ic_prev = prev_iclog; prev_iclog = iclog; bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_iclog; if (!XFS_BUF_CPSEMA(bp)) ASSERT(0); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); @@ -1323,6 +1337,25 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ return log; + +out_free_iclog: + for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { + prev_iclog = iclog->ic_next; + if (iclog->ic_bp) { + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); + xfs_buf_free(iclog->ic_bp); + xlog_trace_iclog_dealloc(iclog); + } + kmem_free(iclog); + } + spinlock_destroy(&log->l_icloglock); + spinlock_destroy(&log->l_grant_lock); + xlog_trace_loggrant_dealloc(log); + xfs_buf_free(log->l_xbuf); +out_free_log: + kmem_free(log); + return NULL; } /* xlog_alloc_log */ -- cgit v1.2.3 From d44dab8d1cde8aeba1faf44a7654f90800feb7fc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 10 Nov 2008 17:06:05 +1100 Subject: fs: xfs needs inode_wait to be exported Since wait_on_inode() references it. Signed-off-by: Stephen Rothwell Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index f84ba338faf..098a2443196 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1334,6 +1334,7 @@ int inode_wait(void *word) schedule(); return 0; } +EXPORT_SYMBOL(inode_wait); /* * If we try to find an inode in the inode hash while it is being -- cgit v1.2.3 From cb4f0d1d4229f609f43c68acec69c7618ed72397 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 10 Nov 2008 17:11:18 +1100 Subject: [XFS] fix uninitialised variable bug in dquot release gcc on ARM warns about an using an uninitialised variable in xfs_qm_dqrele_all_inodes(). This is a real bug, but gcc on x86_64 is not reporting this warning so it went unnoticed. Fix the bug by bring the inode radix tree walk code up to date with xfs_sync_inodes_ag(). SGI-PV: 987246 Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/quota/xfs_qm_syscalls.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 9ff28e6c5b8..2c57a6eaff3 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1036,9 +1036,6 @@ xfs_qm_dqrele_inodes_ag( int nr_found; do { - boolean_t inode_refed; - struct inode *inode; - /* * use a gang lookup to find the next inode in the tree * as the tree is sparse and a gang lookup walks to find @@ -1053,27 +1050,37 @@ xfs_qm_dqrele_inodes_ag( break; } - /* update the index for the next lookup */ + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } - /* skip quota inodes and those in reclaim */ - inode = VFS_I(ip); - if (!inode || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { + /* skip quota inodes */ + if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); read_unlock(&pag->pag_ici_lock); continue; } - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - inode = igrab(inode); - read_unlock(&pag->pag_ici_lock); - if (!inode) - continue; - inode_refed = B_TRUE; - xfs_ilock(ip, XFS_ILOCK_EXCL); - } else { + + /* + * If we can't get a reference on the inode, it must be + * in reclaim. Leave it for the reclaim code to flush. + */ + if (!igrab(VFS_I(ip))) { read_unlock(&pag->pag_ici_lock); + continue; } + read_unlock(&pag->pag_ici_lock); + + xfs_ilock(ip, XFS_ILOCK_EXCL); if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; @@ -1083,9 +1090,8 @@ xfs_qm_dqrele_inodes_ag( xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (inode_refed) - IRELE(ip); + xfs_iput(ip, XFS_ILOCK_EXCL); + } while (nr_found); } -- cgit v1.2.3 From 6307091fe69ae74747298bdcaf43119ad67bda3a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 10 Nov 2008 17:13:23 +1100 Subject: [XFS] Avoid using inodes that haven't been completely initialised The radix tree walks in xfs_sync_inodes_ag and xfs_qm_dqrele_all_inodes() can find inodes that are still undergoing initialisation. Avoid them by checking for the the XFS_INEW() flag once we have a reference on the inode. This flag is cleared once the inode is properly initialised. SGI-PV: 987246 Signed-off-by: Dave Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_iops.c | 1 - fs/xfs/linux-2.6/xfs_sync.c | 5 +++-- fs/xfs/quota/xfs_qm_syscalls.c | 6 ++++++ 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index f78bc221576..69c98cf3233 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -780,7 +780,6 @@ xfs_setup_inode( inode->i_ino = ip->i_ino; inode->i_state = I_NEW|I_LOCK; inode_add_to_lists(ip->i_mount->m_super, inode); - ASSERT(atomic_read(&inode->i_count) == 1); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index fb5cca3df84..d12d31b86fa 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -117,8 +117,9 @@ xfs_sync_inodes_ag( } read_unlock(&pag->pag_ici_lock); - /* bad inodes are dealt with elsewhere */ - if (is_bad_inode(inode)) { + /* avoid new or bad inodes */ + if (is_bad_inode(inode) || + xfs_iflags_test(ip, XFS_INEW)) { IRELE(ip); continue; } diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 2c57a6eaff3..68139b38aed 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1080,6 +1080,12 @@ xfs_qm_dqrele_inodes_ag( } read_unlock(&pag->pag_ici_lock); + /* avoid new inodes though we shouldn't find any here */ + if (xfs_iflags_test(ip, XFS_INEW)) { + IRELE(ip); + continue; + } + xfs_ilock(ip, XFS_ILOCK_EXCL); if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); -- cgit v1.2.3 From cc09c0dc57de7f7d2ed89d480b5653e5f6a32f2c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 17 Nov 2008 17:37:10 +1100 Subject: [XFS] Fix double free of log tickets When an I/O error occurs during an intermediate commit on a rolling transaction, xfs_trans_commit() will free the transaction structure and the related ticket. However, the duplicate transaction that gets used as the transaction continues still contains a pointer to the ticket. Hence when the duplicate transaction is cancelled and freed, we free the ticket a second time. Add reference counting to the ticket so that we hold an extra reference to the ticket over the transaction commit. We drop the extra reference once we have checked that the transaction commit did not return an error, thus avoiding a double free on commit error. Credit to Nick Piggin for tripping over the problem. SGI-PV: 989741 Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_bmap.c | 10 ++++++++-- fs/xfs/xfs_inode.c | 10 ++++++++-- fs/xfs/xfs_log.c | 39 +++++++++++++++++++++++++-------------- fs/xfs/xfs_log.h | 4 ++++ fs/xfs/xfs_log_priv.h | 1 + fs/xfs/xfs_trans.c | 9 ++++++++- fs/xfs/xfs_utils.c | 6 ++++++ fs/xfs/xfs_vnodeops.c | 6 ++++++ 8 files changed, 66 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index db289050692..c3912213645 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4292,9 +4292,15 @@ xfs_bmap_finish( * We have a new transaction, so we should return committed=1, * even though we're returning an error. */ - if (error) { + if (error) return error; - } + + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(ntp->t_ticket); + if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES, logcount))) return error; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cd522827f99..b9771004706 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1782,8 +1782,14 @@ xfs_itruncate_finish( xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ihold(ntp, ip); - if (!error) - error = xfs_trans_reserve(ntp, 0, + if (error) + return error; + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(ntp->t_ticket); + error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 92c20a8d9e6..4bf44aef644 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -100,12 +100,11 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, /* local ticket functions */ -STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, +STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log, int unit_bytes, int count, char clientid, uint flags); -STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket); #if defined(DEBUG) STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); @@ -360,7 +359,7 @@ xfs_log_done(xfs_mount_t *mp, */ xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); xlog_ungrant_log_space(log, ticket); - xlog_ticket_put(log, ticket); + xfs_log_ticket_put(ticket); } else { xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); @@ -514,7 +513,7 @@ xfs_log_reserve(xfs_mount_t *mp, retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ - internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, + internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, client, flags); if (!internal_ticket) return XFS_ERROR(ENOMEM); @@ -749,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (tic) { xlog_trace_loggrant(log, tic, "unmount rec"); xlog_ungrant_log_space(log, tic); - xlog_ticket_put(log, tic); + xfs_log_ticket_put(tic); } } else { /* @@ -3222,22 +3221,33 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) */ /* - * Free a used ticket. + * Free a used ticket when it's refcount falls to zero. */ -STATIC void -xlog_ticket_put(xlog_t *log, - xlog_ticket_t *ticket) +void +xfs_log_ticket_put( + xlog_ticket_t *ticket) { - sv_destroy(&ticket->t_wait); - kmem_zone_free(xfs_log_ticket_zone, ticket); -} /* xlog_ticket_put */ + ASSERT(atomic_read(&ticket->t_ref) > 0); + if (atomic_dec_and_test(&ticket->t_ref)) { + sv_destroy(&ticket->t_wait); + kmem_zone_free(xfs_log_ticket_zone, ticket); + } +} +xlog_ticket_t * +xfs_log_ticket_get( + xlog_ticket_t *ticket) +{ + ASSERT(atomic_read(&ticket->t_ref) > 0); + atomic_inc(&ticket->t_ref); + return ticket; +} /* * Allocate and initialise a new log ticket. */ STATIC xlog_ticket_t * -xlog_ticket_get(xlog_t *log, +xlog_ticket_alloc(xlog_t *log, int unit_bytes, int cnt, char client, @@ -3308,6 +3318,7 @@ xlog_ticket_get(xlog_t *log, unit_bytes += 2*BBSIZE; } + atomic_set(&tic->t_ref, 1); tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; tic->t_cnt = cnt; @@ -3323,7 +3334,7 @@ xlog_ticket_get(xlog_t *log, xlog_tic_reset_res(tic); return tic; -} /* xlog_ticket_get */ +} /****************************************************************************** diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d47b91f1082..8a3e84e900a 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -134,6 +134,7 @@ typedef struct xfs_log_callback { #ifdef __KERNEL__ /* Log manager interfaces */ struct xfs_mount; +struct xlog_ticket; xfs_lsn_t xfs_log_done(struct xfs_mount *mp, xfs_log_ticket_t ticket, void **iclog, @@ -177,6 +178,9 @@ int xfs_log_need_covered(struct xfs_mount *mp); void xlog_iodone(struct xfs_buf *); +struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket); +void xfs_log_ticket_put(struct xlog_ticket *ticket); + #endif diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index de7ef6ca920..b39a1980e82 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -245,6 +245,7 @@ typedef struct xlog_ticket { struct xlog_ticket *t_next; /* :4|8 */ struct xlog_ticket *t_prev; /* :4|8 */ xlog_tid_t t_tid; /* transaction identifier : 4 */ + atomic_t t_ref; /* ticket reference count : 4 */ int t_curr_res; /* current reservation in bytes : 4 */ int t_unit_res; /* unit reservation in bytes : 4 */ char t_ocnt; /* original count : 1 */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ad137efc870..8570b826fed 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -290,7 +290,7 @@ xfs_trans_dup( ASSERT(tp->t_ticket != NULL); ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); - ntp->t_ticket = tp->t_ticket; + ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; tp->t_blk_res = tp->t_blk_res_used; ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; @@ -1259,6 +1259,13 @@ xfs_trans_roll( trans = *tpp; + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(trans->t_ticket); + + /* * Reserve space in the log for th next transaction. * This also pushes items in the "AIL", the list of logged items, diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 35d4d414bcc..771144932ab 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -172,6 +172,12 @@ xfs_dir_ialloc( *ipp = NULL; return code; } + + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(tp->t_ticket); code = xfs_trans_reserve(tp, 0, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c45ea278ef4..0574aadc4d3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1018,6 +1018,12 @@ xfs_inactive_symlink_rmt( ASSERT(XFS_FORCED_SHUTDOWN(mp)); goto error0; } + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(tp->t_ticket); + /* * Remove the memory for extent descriptions (just bookkeeping). */ -- cgit v1.2.3 From bac8dca9f9b1dfcf9c4ecb4f9ca17185b828cc20 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:31 +1100 Subject: [XFS] fix NULL pointer dereference in xfs_log_force_umount xfs_log_force_umount may be called very early during log recovery where If we fail a buffer read in xlog_recover_do_inode_trans we abort the mount. But at that point log recovery has started delayed writeback of inode buffers. As part of the aborted mount we try to flush out all delwri buffers, but at that point we have already freed the superblock, and set mp->m_sb_bp to NULL, and xfs_log_force_umount which gets called after the inode buffer writeback trips over it. Make xfs_log_force_umount a little more careful when accessing mp->m_sb_bp to avoid this. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Niv Sardi --- fs/xfs/xfs_log.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4bf44aef644..8a5b05536a2 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3569,7 +3569,8 @@ xfs_log_force_umount( if (!log || log->l_flags & XLOG_ACTIVE_RECOVERY) { mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; - XFS_BUF_DONE(mp->m_sb_bp); + if (mp->m_sb_bp) + XFS_BUF_DONE(mp->m_sb_bp); return 0; } @@ -3590,7 +3591,9 @@ xfs_log_force_umount( spin_lock(&log->l_icloglock); spin_lock(&log->l_grant_lock); mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; - XFS_BUF_DONE(mp->m_sb_bp); + if (mp->m_sb_bp) + XFS_BUF_DONE(mp->m_sb_bp); + /* * This flag is sort of redundant because of the mount flag, but * it's good to maintain the separation between the log and the rest -- cgit v1.2.3 From f999a5bf3fa6b3d11334c3ba1e9dcfed5ff9f8a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:32 +1100 Subject: [XFS] wire up ->open for directories Currently there's no ->open method set for directories on XFS. That means we don't perform any check for opening too large directories without O_LARGEFILE, we don't check for shut down filesystems, and we don't actually do the readahead for the first block in the directory. Instead of just setting the directories open routine to xfs_file_open we merge the shutdown check directly into xfs_file_open and create a new xfs_dir_open that first calls xfs_file_open and then performs the readahead for block 0. (First sent on September 29th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_file.c | 34 +++++++++++++++++++++++++++++++--- fs/xfs/xfs_vnodeops.c | 22 ---------------------- fs/xfs/xfs_vnodeops.h | 1 - 3 files changed, 31 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 3fee790f138..72fc8d8c8bc 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -38,6 +38,7 @@ #include "xfs_rw.h" #include "xfs_ioctl32.h" #include "xfs_vnodeops.h" +#include "xfs_da_btree.h" #include #include @@ -169,11 +170,37 @@ xfs_file_splice_write_invis( STATIC int xfs_file_open( struct inode *inode, - struct file *filp) + struct file *file) { - if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) + if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EFBIG; - return -xfs_open(XFS_I(inode)); + if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) + return -EIO; + return 0; +} + +STATIC int +xfs_dir_open( + struct inode *inode, + struct file *file) +{ + struct xfs_inode *ip = XFS_I(inode); + int mode; + int error; + + error = xfs_file_open(inode, file); + if (error) + return error; + + /* + * If there are any blocks, read-ahead block 0 as we're almost + * certain to have the next operation be a read there. + */ + mode = xfs_ilock_map_shared(ip); + if (ip->i_d.di_nextents > 0) + xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); + xfs_iunlock(ip, mode); + return 0; } STATIC int @@ -345,6 +372,7 @@ const struct file_operations xfs_invis_file_operations = { const struct file_operations xfs_dir_file_operations = { + .open = xfs_dir_open, .read = generic_read_dir, .readdir = xfs_file_readdir, .llseek = generic_file_llseek, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0574aadc4d3..c055bdb11cb 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -53,28 +53,6 @@ #include "xfs_filestream.h" #include "xfs_vnodeops.h" -int -xfs_open( - xfs_inode_t *ip) -{ - int mode; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return XFS_ERROR(EIO); - - /* - * If it's a directory with any blocks, read-ahead block 0 - * as we're almost certain to have the next operation be a read there. - */ - if (S_ISDIR(ip->i_d.di_mode) && ip->i_d.di_nextents > 0) { - mode = xfs_ilock_map_shared(ip); - if (ip->i_d.di_nextents > 0) - (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); - xfs_iunlock(ip, mode); - } - return 0; -} - int xfs_setattr( struct xfs_inode *ip, diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index b1ae8e3f404..a559400aeae 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -14,7 +14,6 @@ struct xfs_inode; struct xfs_iomap; -int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ -- cgit v1.2.3 From 6c31b93a14a453c8756ffd228e24910ffdf30c5d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:32 +1100 Subject: [XFS] allow inode64 mount option on 32 bit systems Now that we've stopped using the Linux inode cache when can trivally support the inode64 mount option on 32bit architectures. As far as the kernel and most userspace is concerned this works perfectly, but applications still using really old stat and readdir interfaces will get an EOVERFLOW error when hitting an inode number not fitting into 32 bits (that problem of course also exists when using these applications on a 64bit kernel). Note that because inode64 is simply a mount option we can currently mount a filesystem having > 32 bit inode numbers and cause a variety of problems, all this is solved but this patch which enables XFS_BIG_INUMS, even when inode64 is not used. (First sent on October 18th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_linux.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 77d6ddcaf54..cfe16a36a1d 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -21,18 +21,12 @@ #include /* - * Some types are conditional depending on the target system. * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. - * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well - * as requiring XFS_BIG_BLKNOS to be set. + * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. */ #if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) # define XFS_BIG_BLKNOS 1 -# if BITS_PER_LONG == 64 -# define XFS_BIG_INUMS 1 -# else -# define XFS_BIG_INUMS 0 -# endif +# define XFS_BIG_INUMS 1 #else # define XFS_BIG_BLKNOS 0 # define XFS_BIG_INUMS 0 -- cgit v1.2.3 From 65795910c1b798f8a47181b48cf6eb163a15e778 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:33 +1100 Subject: [XFS] fix spurious gcc warnings Some recent gcc warnings don't like passing string variables to printf-like functions without using at least a "%s" format string. Change the two occurances of that in xfs to please gcc. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_stats.c | 2 +- fs/xfs/linux-2.6/xfs_super.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index 64f4ec90b8b..c3526d445f6 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -61,7 +61,7 @@ xfs_read_xfsstats( /* Loop over all stats groups */ for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { - len += sprintf(buffer + len, xstats[i].desc); + len += sprintf(buffer + len, "%s", xstats[i].desc); /* inner loop does each group */ while (j < xstats[i].endpoint) { val = 0; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c3d004bc462..cc5e07e3e7a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1848,10 +1848,9 @@ STATIC int __init init_xfs_fs(void) { int error; - static char message[] __initdata = KERN_INFO \ - XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"; - printk(message); + printk(KERN_INFO XFS_VERSION_STRING " with " + XFS_BUILD_OPTIONS " enabled\n"); ktrace_init(64); vn_init(); -- cgit v1.2.3 From 2e6560929d8ab4b650fecc3a87013852b34f0922 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 28 Nov 2008 14:23:33 +1100 Subject: [XFS] fix error inversion problems with data flushing XFS gets the sign of the error wrong in several places when gathering the error from generic linux functions. These functions return negative error values, while the core XFS code returns positive error values. Hence when XFS inverts the error to be returned to the VFS, it can incorrectly invert a negative error and this error will be ignored by the syscall return. Fix all the problems related to calling filemap_* functions. Problem initially identified by Nick Piggin in xfs_fsync(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_fs_subr.c | 23 ++++++++++++++++++++--- fs/xfs/linux-2.6/xfs_lrw.c | 2 +- fs/xfs/linux-2.6/xfs_super.c | 13 +++++++++---- fs/xfs/xfs_vnodeops.c | 2 +- fs/xfs/xfs_vnodeops.h | 1 + 5 files changed, 32 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 36caa6d957d..5aeb7777696 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -24,6 +24,10 @@ int fs_noerr(void) { return 0; } int fs_nosys(void) { return ENOSYS; } void fs_noval(void) { return; } +/* + * note: all filemap functions return negative error codes. These + * need to be inverted before returning to the xfs core functions. + */ void xfs_tosspages( xfs_inode_t *ip, @@ -53,7 +57,7 @@ xfs_flushinval_pages( if (!ret) truncate_inode_pages(mapping, first); } - return ret; + return -ret; } int @@ -72,10 +76,23 @@ xfs_flush_pages( xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = filemap_fdatawrite(mapping); if (flags & XFS_B_ASYNC) - return ret; + return -ret; ret2 = filemap_fdatawait(mapping); if (!ret) ret = ret2; } - return ret; + return -ret; +} + +int +xfs_wait_on_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + return -filemap_fdatawait(mapping); + return 0; } diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 1957e5357d0..4959c874499 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -243,7 +243,7 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { if (inode->i_mapping->nrpages) - ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), + ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); if (ret) { diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index cc5e07e3e7a..ae92290e3c1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -990,21 +990,26 @@ xfs_fs_write_inode( struct inode *inode, int sync) { + struct xfs_inode *ip = XFS_I(inode); int error = 0; int flags = 0; - xfs_itrace_entry(XFS_I(inode)); + xfs_itrace_entry(ip); if (sync) { - filemap_fdatawait(inode->i_mapping); + error = xfs_wait_on_pages(ip, 0, -1); + if (error) + goto out_error; flags |= FLUSH_SYNC; } - error = xfs_inode_flush(XFS_I(inode), flags); + error = xfs_inode_flush(ip, flags); + +out_error: /* * if we failed to write out the inode then mark * it dirty again so we'll try again later. */ if (error) - xfs_mark_inode_dirty_sync(XFS_I(inode)); + xfs_mark_inode_dirty_sync(ip); return -error; } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c055bdb11cb..f26b038004a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -681,7 +681,7 @@ xfs_fsync( return XFS_ERROR(EIO); /* capture size updates in I/O completion before writing the inode. */ - error = filemap_fdatawait(VFS_I(ip)->i_mapping); + error = xfs_wait_on_pages(ip, 0, -1); if (error) return XFS_ERROR(error); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index a559400aeae..2a45b00ad32 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -75,5 +75,6 @@ int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last, int fiopt); int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last, uint64_t flags, int fiopt); +int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last); #endif /* _XFS_VNODEOPS_H */ -- cgit v1.2.3 From 0924b585fc49bf371bc700c23e516a538bf589af Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 28 Nov 2008 14:23:34 +1100 Subject: [XFS] fix uninitialised variable bug in dquot release. gcc is warning about an uninitialised variable in xfs_growfs_rt(). This is a false positive. Fix it by changing the scope of the transaction pointer to wholly within the internal loop inside the function. While there, preemptively change xfs_growfs_rt_alloc() in the same way as it has exactly the same structure as xfs_growfs_rt() but gcc is not warning about it. Yet. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi --- fs/xfs/xfs_rtalloc.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index e2f68de1615..f18b9b28179 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -85,7 +85,6 @@ xfs_growfs_rt_alloc( { xfs_fileoff_t bno; /* block number in file */ xfs_buf_t *bp; /* temporary buffer for zeroing */ - int cancelflags; /* flags for xfs_trans_cancel */ int committed; /* transaction committed flag */ xfs_daddr_t d; /* disk block address */ int error; /* error return value */ @@ -96,15 +95,16 @@ xfs_growfs_rt_alloc( xfs_bmbt_irec_t map; /* block map output */ int nmap; /* number of block maps */ int resblks; /* space reservation */ - xfs_trans_t *tp; /* transaction pointer */ /* * Allocate space to the file, as necessary. */ while (oblocks < nblocks) { + int cancelflags = 0; + xfs_trans_t *tp; + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); - cancelflags = 0; /* * Reserve space & log for one extent added to the file. */ @@ -171,7 +171,9 @@ xfs_growfs_rt_alloc( mp->m_bsize, 0); if (bp == NULL) { error = XFS_ERROR(EIO); - goto error_cancel; +error_cancel: + xfs_trans_cancel(tp, cancelflags); + goto error; } memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); @@ -188,8 +190,6 @@ xfs_growfs_rt_alloc( oblocks = map.br_startoff + map.br_blockcount; } return 0; -error_cancel: - xfs_trans_cancel(tp, cancelflags); error: return error; } @@ -1856,7 +1856,6 @@ xfs_growfs_rt( { xfs_rtblock_t bmbno; /* bitmap block number */ xfs_buf_t *bp; /* temporary buffer */ - int cancelflags; /* flags for xfs_trans_cancel */ int error; /* error return value */ xfs_inode_t *ip; /* bitmap inode, used as lock */ xfs_mount_t *nmp; /* new (fake) mount structure */ @@ -1872,10 +1871,8 @@ xfs_growfs_rt( xfs_extlen_t rsumblocks; /* current number of rt summary blks */ xfs_sb_t *sbp; /* old superblock */ xfs_fsblock_t sumbno; /* summary block number */ - xfs_trans_t *tp; /* transaction pointer */ sbp = &mp->m_sb; - cancelflags = 0; /* * Initial error checking. */ @@ -1942,6 +1939,9 @@ xfs_growfs_rt( ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); bmbno < nrbmblocks; bmbno++) { + xfs_trans_t *tp; + int cancelflags = 0; + *nmp = *mp; nsbp = &nmp->m_sb; /* @@ -1967,16 +1967,15 @@ xfs_growfs_rt( * Start a transaction, get the log reservation. */ tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE); - cancelflags = 0; if ((error = xfs_trans_reserve(tp, 0, XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0))) - break; + goto error_cancel; /* * Lock out other callers by grabbing the bitmap inode lock. */ if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, XFS_ILOCK_EXCL, &ip))) - break; + goto error_cancel; ASSERT(ip == mp->m_rbmip); /* * Update the bitmap inode's size. @@ -1990,7 +1989,7 @@ xfs_growfs_rt( */ if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, XFS_ILOCK_EXCL, &ip))) - break; + goto error_cancel; ASSERT(ip == mp->m_rsumip); /* * Update the summary inode's size. @@ -2005,7 +2004,7 @@ xfs_growfs_rt( mp->m_rsumlevels != nmp->m_rsumlevels) { error = xfs_rtcopy_summary(mp, nmp, tp); if (error) - break; + goto error_cancel; } /* * Update superblock fields. @@ -2031,8 +2030,11 @@ xfs_growfs_rt( bp = NULL; error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents, nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); - if (error) + if (error) { +error_cancel: + xfs_trans_cancel(tp, cancelflags); break; + } /* * Mark more blocks free in the superblock. */ @@ -2045,15 +2047,10 @@ xfs_growfs_rt( mp->m_rsumsize = nrsumsize; error = xfs_trans_commit(tp, 0); - if (error) { - tp = NULL; + if (error) break; - } } - if (error && tp) - xfs_trans_cancel(tp, cancelflags); - /* * Free the fake mp structure. */ -- cgit v1.2.3 From 8a7141a8b931d60d42830432b82078cd6dace83b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 28 Nov 2008 14:23:35 +1100 Subject: [XFS] convert xfs_getbmap to take formatter functions Preliminary work to hook up fiemap, this allows us to pass in an arbitrary formatter to copy extent data back to userspace. The formatter takes info for 1 extent, a pointer to the user "thing*" and a pointer to a "filled" variable to indicate whether a userspace buffer did get filled in (for fiemap, hole "extents" are skipped). I'm just using the getbmapx struct as a "common denominator" because as far as I can see, it holds all info that any formatters will care about. ("*thing" because fiemap doesn't pass the user pointer around, but rather has a pointer to a fiemap info structure, and helpers associated with it) Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_ioctl.c | 64 +++++++++++++++++++++++++++----------------- fs/xfs/xfs_bmap.c | 62 +++++++++++++++++------------------------- fs/xfs/xfs_bmap.h | 11 +++++--- fs/xfs/xfs_fs.h | 12 --------- 4 files changed, 71 insertions(+), 78 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index f1bd6c36e6f..d5970599953 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1249,6 +1249,19 @@ xfs_ioc_setxflags( return -xfs_ioctl_setattr(ip, &fa, mask); } +STATIC int +xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmap __user *base = *ap; + + /* copy only getbmap portion (not getbmapx) */ + if (copy_to_user(base, bmv, sizeof(struct getbmap))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmap); + return 0; +} + STATIC int xfs_ioc_getbmap( struct xfs_inode *ip, @@ -1256,37 +1269,48 @@ xfs_ioc_getbmap( unsigned int cmd, void __user *arg) { - struct getbmap bm; - int iflags; + struct getbmapx bmx; int error; - if (copy_from_user(&bm, arg, sizeof(bm))) + if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) return -XFS_ERROR(EFAULT); - if (bm.bmv_count < 2) + if (bmx.bmv_count < 2) return -XFS_ERROR(EINVAL); - iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); + bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); if (ioflags & IO_INVIS) - iflags |= BMV_IF_NO_DMAPI_READ; + bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; - error = xfs_getbmap(ip, &bm, (struct getbmap __user *)arg+1, iflags); + error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, + (struct getbmap *)arg+1); if (error) return -error; - if (copy_to_user(arg, &bm, sizeof(bm))) + /* copy back header - only size of getbmap */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) return -XFS_ERROR(EFAULT); return 0; } +STATIC int +xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmapx __user *base = *ap; + + if (copy_to_user(base, bmv, sizeof(struct getbmapx))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmapx); + return 0; +} + STATIC int xfs_ioc_getbmapx( struct xfs_inode *ip, void __user *arg) { struct getbmapx bmx; - struct getbmap bm; - int iflags; int error; if (copy_from_user(&bmx, arg, sizeof(bmx))) @@ -1295,26 +1319,16 @@ xfs_ioc_getbmapx( if (bmx.bmv_count < 2) return -XFS_ERROR(EINVAL); - /* - * Map input getbmapx structure to a getbmap - * structure for xfs_getbmap. - */ - GETBMAP_CONVERT(bmx, bm); - - iflags = bmx.bmv_iflags; - - if (iflags & (~BMV_IF_VALID)) + if (bmx.bmv_iflags & (~BMV_IF_VALID)) return -XFS_ERROR(EINVAL); - iflags |= BMV_IF_EXTENDED; - - error = xfs_getbmap(ip, &bm, (struct getbmapx __user *)arg+1, iflags); + error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, + (struct getbmapx *)arg+1); if (error) return -error; - GETBMAP_CONVERT(bm, bmx); - - if (copy_to_user(arg, &bmx, sizeof(bmx))) + /* copy back header */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) return -XFS_ERROR(EFAULT); return 0; diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index c3912213645..8077580a719 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5811,9 +5811,9 @@ error0: STATIC int xfs_getbmapx_fix_eof_hole( xfs_inode_t *ip, /* xfs incore inode pointer */ - struct getbmap *out, /* output structure */ + struct getbmapx *out, /* output structure */ int prealloced, /* this is a file with - * preallocated data space */ + * preallocated data space */ __int64_t end, /* last block requested */ xfs_fsblock_t startblock) { @@ -5839,14 +5839,18 @@ xfs_getbmapx_fix_eof_hole( } /* - * Fcntl interface to xfs_bmapi. + * Get inode's extents as described in bmv, and format for output. + * Calls formatter to fill the user's buffer until all extents + * are mapped, until the passed-in bmv->bmv_count slots have + * been filled, or until the formatter short-circuits the loop, + * if it is tracking filled-in extents on its own. */ int /* error code */ xfs_getbmap( xfs_inode_t *ip, - struct getbmap *bmv, /* user bmap structure */ - void __user *ap, /* pointer to user's array */ - int interface) /* interface flags */ + struct getbmapx *bmv, /* user bmap structure */ + xfs_bmap_format_t formatter, /* format to user */ + void *arg) /* formatter arg */ { __int64_t bmvend; /* last block requested */ int error; /* return value */ @@ -5859,19 +5863,20 @@ xfs_getbmap( int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ - struct getbmap out; /* output structure */ + struct getbmapx out; /* output structure */ int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ int sh_unwritten; /* true, if unwritten */ /* extents listed separately */ + int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ - __int32_t oflags; /* getbmapx bmv_oflags field */ mp = ip->i_mount; + iflags = bmv->bmv_iflags; - whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; - sh_unwritten = (interface & BMV_IF_PREALLOC) != 0; + whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; + sh_unwritten = (iflags & BMV_IF_PREALLOC) != 0; /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ @@ -5886,7 +5891,7 @@ xfs_getbmap( * could misinterpret holes in a DMAPI file as true holes, * when in fact they may represent offline user data. */ - if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && + if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_READ) && whichfork == XFS_DATA_FORK) { error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); @@ -5993,52 +5998,35 @@ xfs_getbmap( ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { - nexleft--; - oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ? + out.bmv_oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ? BMV_OF_PREALLOC : 0; out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); + out.bmv_unused1 = out.bmv_unused2 = 0; ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ goto unlock_and_return; } else { + int full = 0; /* user array is full */ + if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced, bmvend, map[i].br_startblock)) { goto unlock_and_return; } - /* return either getbmap/getbmapx structure. */ - if (interface & BMV_IF_EXTENDED) { - struct getbmapx outx; - - GETBMAP_CONVERT(out,outx); - outx.bmv_oflags = oflags; - outx.bmv_unused1 = outx.bmv_unused2 = 0; - if (copy_to_user(ap, &outx, - sizeof(outx))) { - error = XFS_ERROR(EFAULT); - goto unlock_and_return; - } - } else { - if (copy_to_user(ap, &out, - sizeof(out))) { - error = XFS_ERROR(EFAULT); - goto unlock_and_return; - } - } + /* format results & advance arg */ + error = formatter(&arg, &out, &full); + if (error || full) + goto unlock_and_return; + nexleft--; bmv->bmv_offset = out.bmv_offset + out.bmv_length; bmv->bmv_length = MAX((__int64_t)0, (__int64_t)(bmvend - bmv->bmv_offset)); bmv->bmv_entries++; - ap = (interface & BMV_IF_EXTENDED) ? - (void __user *) - ((struct getbmapx __user *)ap + 1) : - (void __user *) - ((struct getbmap __user *)ap + 1); } } } while (nmap && nexleft && bmv->bmv_length); diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 7c9d12cd7a4..284571c05ed 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -356,15 +356,18 @@ xfs_bmap_finish( xfs_bmap_free_t *flist, /* i/o: list extents to free */ int *committed); /* xact committed or not */ +/* bmap to userspace formatter - copy to user & advance pointer */ +typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); + /* - * Fcntl interface to xfs_bmapi. + * Get inode's extents as described in bmv, and format for output. */ int /* error code */ xfs_getbmap( xfs_inode_t *ip, - struct getbmap *bmv, /* user bmap structure */ - void __user *ap, /* pointer to user's array */ - int iflags); /* interface flags */ + struct getbmapx *bmv, /* user bmap structure */ + xfs_bmap_format_t formatter, /* format to user */ + void *arg); /* formatter arg */ /* * Check if the endoff is outside the last extent. If so the caller will grow diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 01c0cc88d3f..df859d62a16 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -114,22 +114,10 @@ struct getbmapx { #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ #define BMV_IF_VALID (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC) -#ifdef __KERNEL__ -#define BMV_IF_EXTENDED 0x40000000 /* getpmapx if set */ -#endif /* bmv_oflags values - returned for for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ -/* Convert getbmap <-> getbmapx - move fields from p1 to p2. */ -#define GETBMAP_CONVERT(p1,p2) { \ - p2.bmv_offset = p1.bmv_offset; \ - p2.bmv_block = p1.bmv_block; \ - p2.bmv_length = p1.bmv_length; \ - p2.bmv_count = p1.bmv_count; \ - p2.bmv_entries = p1.bmv_entries; } - - /* * Structure for XFS_IOC_FSSETDM. * For use by backup and restore programs to set the XFS on-disk inode -- cgit v1.2.3 From 5af317c942aebc928ab244eb69581bd8e5333215 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 28 Nov 2008 14:23:35 +1100 Subject: [XFS] Add new getbmap flags. This adds a new output flag, BMV_OF_LAST to indicate if we've hit the last extent in the inode. This potentially saves an extra call from userspace to see when the whole mapping is done. It also adds BMV_IF_DELALLOC and BMV_OF_DELALLOC to request, and indicate, delayed-allocation extents. In this case bmv_block is set to -2 (-1 was already taken for HOLESTARTBLOCK; unfortunately these are the reverse of the in-kernel constants.) These new flags facilitate addition of the new fiemap interface. Rather than adding sh_delalloc, remove sh_unwritten & just test the flags directly. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi --- fs/xfs/xfs_bmap.c | 46 ++++++++++++++++++++++++++++++++-------------- fs/xfs/xfs_fs.h | 6 +++++- 2 files changed, 37 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8077580a719..138308e70d1 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5819,6 +5819,9 @@ xfs_getbmapx_fix_eof_hole( { __int64_t fixlen; xfs_mount_t *mp; /* file system mount point */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t lastx; /* last extent pointer */ + xfs_fileoff_t fileblock; if (startblock == HOLESTARTBLOCK) { mp = ip->i_mount; @@ -5832,7 +5835,15 @@ xfs_getbmapx_fix_eof_hole( out->bmv_length = fixlen; } } else { - out->bmv_block = XFS_FSB_TO_DB(ip, startblock); + if (startblock == DELAYSTARTBLOCK) + out->bmv_block = -2; + else + out->bmv_block = XFS_FSB_TO_DB(ip, startblock); + fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && + (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) + out->bmv_oflags |= BMV_OF_LAST; } return 1; @@ -5867,8 +5878,6 @@ xfs_getbmap( int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ - int sh_unwritten; /* true, if unwritten */ - /* extents listed separately */ int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ @@ -5876,7 +5885,6 @@ xfs_getbmap( iflags = bmv->bmv_iflags; whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; - sh_unwritten = (iflags & BMV_IF_PREALLOC) != 0; /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ @@ -5947,8 +5955,9 @@ xfs_getbmap( xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (whichfork == XFS_DATA_FORK && - (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { + if (((iflags & BMV_IF_DELALLOC) == 0) && + (whichfork == XFS_DATA_FORK) && + (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ error = xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); @@ -5958,7 +5967,8 @@ xfs_getbmap( } } - ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); + ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) || + ip->i_delayed_blks == 0); lock = xfs_ilock_map_shared(ip); @@ -5970,7 +5980,7 @@ xfs_getbmap( nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; bmapi_flags = XFS_BMAPI_AFLAG(whichfork) | - ((sh_unwritten) ? 0 : XFS_BMAPI_IGSTATE); + ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE); /* * Allocate enough space to handle "subnex" maps at a time. @@ -5980,9 +5990,12 @@ xfs_getbmap( bmv->bmv_entries = 0; - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0) { - error = 0; - goto unlock_and_return; + if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) { + if (((iflags & BMV_IF_DELALLOC) == 0) || + whichfork == XFS_ATTR_FORK) { + error = 0; + goto unlock_and_return; + } } nexleft = nex; @@ -5998,15 +6011,20 @@ xfs_getbmap( ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { - out.bmv_oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ? - BMV_OF_PREALLOC : 0; + out.bmv_oflags = 0; + if (map[i].br_state == XFS_EXT_UNWRITTEN) + out.bmv_oflags |= BMV_OF_PREALLOC; + else if (map[i].br_startblock == DELAYSTARTBLOCK) + out.bmv_oflags |= BMV_OF_DELALLOC; out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); out.bmv_unused1 = out.bmv_unused2 = 0; - ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); + ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || + (map[i].br_startblock != DELAYSTARTBLOCK)); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ + out.bmv_oflags |= BMV_OF_LAST; goto unlock_and_return; } else { int full = 0; /* user array is full */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index df859d62a16..4ae03bae992 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -113,10 +113,14 @@ struct getbmapx { #define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */ #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ -#define BMV_IF_VALID (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC) +#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ +#define BMV_IF_VALID \ + (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) /* bmv_oflags values - returned for for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ +#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ +#define BMV_OF_LAST 0x4 /* segment is the last in the file */ /* * Structure for XFS_IOC_FSSETDM. -- cgit v1.2.3 From f35642e2f89f2b0379e929bd9027342365abc839 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 28 Nov 2008 14:23:35 +1100 Subject: [XFS] Hook up the fiemap ioctl. This adds the fiemap inode_operation, which for us converts the fiemap values & flags into a getbmapx structure which can be sent to xfs_getbmap. The formatter then copies the bmv array back into the user's fiemap buffer via the fiemap helpers. If we wanted to be more clever, we could also return mapping data for in-inode attributes, but I'm not terribly motivated to do that just yet. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_iops.c | 84 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 69c98cf3233..2903faf6a26 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -53,6 +53,7 @@ #include #include #include +#include /* * Bring the atime in the XFS inode uptodate. @@ -661,6 +662,88 @@ out_error: return error; } +#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) + +/* + * Call fiemap helper to fill in user data. + * Returns positive errors to xfs_getbmap. + */ +STATIC int +xfs_fiemap_format( + void **arg, + struct getbmapx *bmv, + int *full) +{ + int error; + struct fiemap_extent_info *fieinfo = *arg; + u32 fiemap_flags = 0; + u64 logical, physical, length; + + /* Do nothing for a hole */ + if (bmv->bmv_block == -1LL) + return 0; + + logical = BBTOB(bmv->bmv_offset); + physical = BBTOB(bmv->bmv_block); + length = BBTOB(bmv->bmv_length); + + if (bmv->bmv_oflags & BMV_OF_PREALLOC) + fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; + else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { + fiemap_flags |= FIEMAP_EXTENT_DELALLOC; + physical = 0; /* no block yet */ + } + if (bmv->bmv_oflags & BMV_OF_LAST) + fiemap_flags |= FIEMAP_EXTENT_LAST; + + error = fiemap_fill_next_extent(fieinfo, logical, physical, + length, fiemap_flags); + if (error > 0) { + error = 0; + *full = 1; /* user array now full */ + } + + return -error; +} + +STATIC int +xfs_vn_fiemap( + struct inode *inode, + struct fiemap_extent_info *fieinfo, + u64 start, + u64 length) +{ + xfs_inode_t *ip = XFS_I(inode); + struct getbmapx bm; + int error; + + error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); + if (error) + return error; + + /* Set up bmap header for xfs internal routine */ + bm.bmv_offset = BTOBB(start); + /* Special case for whole file */ + if (length == FIEMAP_MAX_OFFSET) + bm.bmv_length = -1LL; + else + bm.bmv_length = BTOBB(length); + + /* our formatter will tell xfs_getbmap when to stop. */ + bm.bmv_count = MAXEXTNUM; + bm.bmv_iflags = BMV_IF_PREALLOC; + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) + bm.bmv_iflags |= BMV_IF_ATTRFORK; + if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) + bm.bmv_iflags |= BMV_IF_DELALLOC; + + error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); + if (error) + return -error; + + return 0; +} + static const struct inode_operations xfs_inode_operations = { .permission = xfs_vn_permission, .truncate = xfs_vn_truncate, @@ -671,6 +754,7 @@ static const struct inode_operations xfs_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .fallocate = xfs_vn_fallocate, + .fiemap = xfs_vn_fiemap, }; static const struct inode_operations xfs_dir_inode_operations = { -- cgit v1.2.3 From 00dd4029e9afa642c2b26dc3aac834322ac29b4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:36 +1100 Subject: [XFS] remove bhv_statvfs_t typedef Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_vfs.h | 2 -- fs/xfs/quota/xfs_qm_bhv.c | 4 ++-- fs/xfs/xfs_mount.h | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 0ab60bc2e76..8de5c4c95cb 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -31,8 +31,6 @@ struct xfs_inode; struct xfs_mount; struct xfs_mount_args; -typedef struct kstatfs bhv_statvfs_t; - #define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ #define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ #define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 9556df9f7da..bc6c5cca3e1 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -50,7 +50,7 @@ STATIC void xfs_fill_statvfs_from_dquot( - bhv_statvfs_t *statp, + struct kstatfs *statp, xfs_disk_dquot_t *dp) { __uint64_t limit; @@ -87,7 +87,7 @@ xfs_fill_statvfs_from_dquot( STATIC void xfs_qm_statvfs( xfs_inode_t *ip, - bhv_statvfs_t *statp) + struct kstatfs *statp) { xfs_mount_t *mp = ip->i_mount; xfs_dquot_t *dqp; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e3f618c84e4..d7031430252 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -134,7 +134,7 @@ typedef struct xfs_dquot * (*xfs_dqvopchown_t)( struct xfs_dquot **, struct xfs_dquot *); typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *, struct xfs_dquot *, struct xfs_dquot *, uint); -typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, bhv_statvfs_t *); +typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *); typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); typedef int (*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t); -- cgit v1.2.3 From 2b5decd09e9f98c4e361f97f3e32d80164774f75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:36 +1100 Subject: [XFS] remove xfs_vfs.h The only thing left are the forced shutdown flags and freeze macros which fit into xfs_mount.h much better. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_ioctl32.c | 1 - fs/xfs/linux-2.6/xfs_linux.h | 1 - fs/xfs/linux-2.6/xfs_vfs.h | 44 ------------------------------------------ fs/xfs/linux-2.6/xfs_vnode.h | 3 +++ fs/xfs/xfs_mount.h | 10 ++++++++++ 5 files changed, 13 insertions(+), 46 deletions(-) delete mode 100644 fs/xfs/linux-2.6/xfs_vfs.h (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index a4b254eb43b..6ce9c9c7cdd 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -36,7 +36,6 @@ #include "xfs_bmap_btree.h" #include "xfs_attr_sf.h" #include "xfs_dir2_sf.h" -#include "xfs_vfs.h" #include "xfs_vnode.h" #include "xfs_dinode.h" #include "xfs_inode.h" diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index cfe16a36a1d..507492d6dcc 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -80,7 +80,6 @@ #include #include -#include #include #include #include diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h deleted file mode 100644 index 8de5c4c95cb..00000000000 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VFS_H__ -#define __XFS_VFS_H__ - -#include -#include "xfs_fs.h" - -struct inode; - -struct fid; -struct cred; -struct seq_file; -struct super_block; -struct xfs_inode; -struct xfs_mount; -struct xfs_mount_args; - -#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ -#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ -#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ -#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */ -#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ -#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ - -#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) -#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) - -#endif /* __XFS_VFS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index bf89e41c3b8..fb49e0f42d3 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -18,7 +18,10 @@ #ifndef __XFS_VNODE_H__ #define __XFS_VNODE_H__ +#include "xfs_fs.h" + struct file; +struct xfs_inode; struct xfs_iomap; struct attrlist_cursor_kern; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d7031430252..4fce22a8c35 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -428,6 +428,16 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, #define xfs_force_shutdown(m,f) \ xfs_do_force_shutdown(m, f, __FILE__, __LINE__) +#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ +#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ +#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ +#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */ +#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ +#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ + +#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) +#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) + /* * Flags for xfs_mountfs */ -- cgit v1.2.3 From 207fcfad58482c7c9f92939a1f6df9f7e8873a34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:37 +1100 Subject: [XFS] remove xfs_vfsops.h The only thing left is xfs_do_force_shutdown which already has a defintion in xfs_mount.h. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_export.c | 1 - fs/xfs/linux-2.6/xfs_super.c | 1 - fs/xfs/xfs_vfsops.c | 1 - fs/xfs/xfs_vfsops.h | 14 -------------- 4 files changed, 17 deletions(-) delete mode 100644 fs/xfs/xfs_vfsops.h (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 7f7abec25e1..595751f7835 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -29,7 +29,6 @@ #include "xfs_vnodeops.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_vfsops.h" /* * Note that we only accept fileids which are long enough rather than allow diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index ae92290e3c1..5389f077874 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -48,7 +48,6 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" -#include "xfs_vfsops.h" #include "xfs_version.h" #include "xfs_log_priv.h" #include "xfs_trans_priv.h" diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 305d9f3948e..cad3da36fb2 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -53,7 +53,6 @@ #include "xfs_filestream.h" #include "xfs_fsops.h" #include "xfs_vnodeops.h" -#include "xfs_vfsops.h" #include "xfs_utils.h" #include "xfs_sync.h" diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h deleted file mode 100644 index 6b8e0b52b95..00000000000 --- a/fs/xfs/xfs_vfsops.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _XFS_VFSOPS_H -#define _XFS_VFSOPS_H 1 - -struct cred; -struct xfs_fid; -struct inode; -struct kstatfs; -struct xfs_mount; -struct xfs_mount_args; - -void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, - int lnnum); - -#endif /* _XFS_VFSOPS_H */ -- cgit v1.2.3 From 26c5295135d10fc90cbf160adfda392d91f58279 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 28 Nov 2008 14:23:37 +1100 Subject: [XFS] remove i_gen from incore inode i_gen is incremented in directory operations when the directory is changed. It is never read or otherwise used so it should be removed to help reduce the size of the struct xfs_inode. The patch also removes a duplicate logging of the directory inode core. We only need to do this once per transaction so kill the one associated with the i_gen increment. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi --- fs/xfs/xfs_inode.h | 1 - fs/xfs/xfs_rename.c | 12 ++---------- fs/xfs/xfs_vnodeops.c | 29 ++--------------------------- 3 files changed, 4 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7f007ef4bbb..ea691c738f2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -261,7 +261,6 @@ typedef struct xfs_inode { unsigned short i_flags; /* see defined flags below */ unsigned char i_update_core; /* timestamps/size is dirty */ unsigned char i_update_size; /* di_size field is dirty */ - unsigned int i_gen; /* generation count */ unsigned int i_delayed_blks; /* count of delay alloc blks */ xfs_icdinode_t i_d; /* most of ondisk inode */ diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index d700dacdb10..02f0e8f53a9 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -367,19 +367,11 @@ xfs_rename( &first_block, &free_list, spaceres); if (error) goto abort_return; - xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Update the generation counts on all the directory inodes - * that we're modifying. - */ - src_dp->i_gen++; + xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); - - if (new_parent) { - target_dp->i_gen++; + if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); - } /* * If this is a synchronous mount, make sure that the diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index f26b038004a..b29a0eb9c0f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1599,8 +1599,6 @@ xfs_create( xfs_trans_set_sync(tp); } - dp->i_gen++; - /* * Attach the dquot(s) to the inodes and modify them incore. * These ids of the inode couldn't have changed since the new @@ -1967,13 +1965,6 @@ xfs_remove( } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Bump the in memory generation count on the parent - * directory so that other can know that it has changed. - */ - dp->i_gen++; - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - if (is_dir) { /* * Drop the link from ip's "..". @@ -1991,8 +1982,8 @@ xfs_remove( } else { /* * When removing a non-directory we need to log the parent - * inode here for the i_gen update. For a directory this is - * done implicitly by the xfs_droplink call for the ".." entry. + * inode here. For a directory this is done implicitly + * by the xfs_droplink call for the ".." entry. */ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } @@ -2152,7 +2143,6 @@ xfs_link( if (error) goto abort_return; xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - tdp->i_gen++; xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); @@ -2329,18 +2319,10 @@ xfs_mkdir( } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Bump the in memory version number of the parent directory - * so that other processes accessing it will recognize that - * the directory has changed. - */ - dp->i_gen++; - error = xfs_dir_init(tp, cdp, dp); if (error) goto error2; - cdp->i_gen = 1; error = xfs_bumplink(tp, dp); if (error) goto error2; @@ -2626,13 +2608,6 @@ xfs_symlink( xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - /* - * Bump the in memory version number of the parent directory - * so that other processes accessing it will recognize that - * the directory has changed. - */ - dp->i_gen++; - /* * If this is a synchronous mount, make sure that the * symlink transaction goes to disk before returning to -- cgit v1.2.3 From 5e1be0fb1a3950597aeda448698e85b0595a2e92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:37 +1100 Subject: [XFS] factor out xfs_read_agi helper Add a helper to read the AGI header and perform basic verification. Based on hunks from a larger patch from Dave Chinner. (First sent on Juli 23rd) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_ag.h | 3 ++ fs/xfs/xfs_ialloc.c | 101 +++++++++++++++++++++++++++++------------------ fs/xfs/xfs_inode.c | 57 +++----------------------- fs/xfs/xfs_log_recover.c | 72 ++++++++++++--------------------- 4 files changed, 98 insertions(+), 135 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 2bfd8632914..f4a315390c6 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -142,6 +142,9 @@ typedef struct xfs_agi { #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) +extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, struct xfs_buf **bpp); + /* * The third a.g. block contains the a.g. freelist, an array * of block pointers to blocks owned by the allocation btree code. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c8a56c52964..efb65fea203 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1462,70 +1462,95 @@ xfs_ialloc_log_agi( xfs_trans_log_buf(tp, bp, first, last); } +#ifdef DEBUG +STATIC void +xfs_check_agi_unlinked( + struct xfs_agi *agi) +{ + int i; + + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) + ASSERT(agi->agi_unlinked[i]); +} +#else +#define xfs_check_agi_unlinked(agi) +#endif + /* * Read in the allocation group header (inode allocation section) */ int -xfs_ialloc_read_agi( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_buf_t **bpp) /* allocation group hdr buf */ +xfs_read_agi( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + struct xfs_buf **bpp) /* allocation group hdr buf */ { - xfs_agi_t *agi; /* allocation group header */ - int agi_ok; /* agi is consistent */ - xfs_buf_t *bp; /* allocation group hdr buf */ - xfs_perag_t *pag; /* per allocation group data */ - int error; + struct xfs_agi *agi; /* allocation group header */ + int agi_ok; /* agi is consistent */ + int error; ASSERT(agno != NULLAGNUMBER); - error = xfs_trans_read_buf( - mp, tp, mp->m_ddev_targp, + + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &bp); + XFS_FSS_TO_BB(mp, 1), 0, bpp); if (error) return error; - ASSERT(bp && !XFS_BUF_GETERROR(bp)); + + ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); + agi = XFS_BUF_TO_AGI(*bpp); /* * Validate the magic number of the agi block. */ - agi = XFS_BUF_TO_AGI(bp); - agi_ok = - be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && - XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); + agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && + XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && + be32_to_cpu(agi->agi_seqno) == agno; if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, XFS_RANDOM_IALLOC_READ_AGI))) { - XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW, + XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW, mp, agi); - xfs_trans_brelse(tp, bp); + xfs_trans_brelse(tp, *bpp); return XFS_ERROR(EFSCORRUPTED); } + + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF); + + xfs_check_agi_unlinked(agi); + return 0; +} + +int +xfs_ialloc_read_agi( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + struct xfs_buf **bpp) /* allocation group hdr buf */ +{ + struct xfs_agi *agi; /* allocation group header */ + struct xfs_perag *pag; /* per allocation group data */ + int error; + + error = xfs_read_agi(mp, tp, agno, bpp); + if (error) + return error; + + agi = XFS_BUF_TO_AGI(*bpp); pag = &mp->m_perag[agno]; + if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_count = be32_to_cpu(agi->agi_count); pag->pagi_init = 1; - } else { - /* - * It's possible for these to be out of sync if - * we are in the middle of a forced shutdown. - */ - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } - -#ifdef DEBUG - { - int i; - - for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) - ASSERT(agi->agi_unlinked[i]); } -#endif - XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF); - *bpp = bp; + /* + * It's possible for these to be out of sync if + * we are in the middle of a forced shutdown. + */ + ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || + XFS_FORCED_SHUTDOWN(mp)); return 0; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b9771004706..46b0526acd4 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1843,13 +1843,10 @@ xfs_iunlink( xfs_dinode_t *dip; xfs_buf_t *agibp; xfs_buf_t *ibp; - xfs_agnumber_t agno; - xfs_daddr_t agdaddr; xfs_agino_t agino; short bucket_index; int offset; int error; - int agi_ok; ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_mode != 0); @@ -1857,31 +1854,15 @@ xfs_iunlink( mp = tp->t_mountp; - agno = XFS_INO_TO_AGNO(mp, ip->i_ino); - agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); - /* * Get the agi buffer first. It ensures lock ordering * on the list. */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, - XFS_FSS_TO_BB(mp, 1), 0, &agibp); + error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); if (error) return error; - - /* - * Validate the magic number of the agi block. - */ agi = XFS_BUF_TO_AGI(agibp); - agi_ok = - be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && - XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); - if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK, - XFS_RANDOM_IUNLINK))) { - XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi); - xfs_trans_brelse(tp, agibp); - return XFS_ERROR(EFSCORRUPTED); - } + /* * Get the index into the agi hash table for the * list this inode will go on. @@ -1941,7 +1922,6 @@ xfs_iunlink_remove( xfs_buf_t *agibp; xfs_buf_t *ibp; xfs_agnumber_t agno; - xfs_daddr_t agdaddr; xfs_agino_t agino; xfs_agino_t next_agino; xfs_buf_t *last_ibp; @@ -1949,45 +1929,20 @@ xfs_iunlink_remove( short bucket_index; int offset, last_offset = 0; int error; - int agi_ok; - /* - * First pull the on-disk inode from the AGI unlinked list. - */ mp = tp->t_mountp; - agno = XFS_INO_TO_AGNO(mp, ip->i_ino); - agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); /* * Get the agi buffer first. It ensures lock ordering * on the list. */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, - XFS_FSS_TO_BB(mp, 1), 0, &agibp); - if (error) { - cmn_err(CE_WARN, - "xfs_iunlink_remove: xfs_trans_read_buf() returned an error %d on %s. Returning error.", - error, mp->m_fsname); + error = xfs_read_agi(mp, tp, agno, &agibp); + if (error) return error; - } - /* - * Validate the magic number of the agi block. - */ + agi = XFS_BUF_TO_AGI(agibp); - agi_ok = - be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && - XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); - if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE, - XFS_RANDOM_IUNLINK_REMOVE))) { - XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW, - mp, agi); - xfs_trans_brelse(tp, agibp); - cmn_err(CE_WARN, - "xfs_iunlink_remove: XFS_TEST_ERROR() returned an error on %s. Returning EFSCORRUPTED.", - mp->m_fsname); - return XFS_ERROR(EFSCORRUPTED); - } + /* * Get the index into the agi hash table for the * list this inode will go on. diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b411d494731..b552676ca5c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3117,19 +3117,16 @@ xlog_recover_clear_agi_bucket( int error; tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); - error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); - if (!error) - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &agibp); + error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), + 0, 0, 0); if (error) goto out_abort; - error = EINVAL; - agi = XFS_BUF_TO_AGI(agibp); - if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) + error = xfs_read_agi(mp, tp, agno, &agibp); + if (error) goto out_abort; + agi = XFS_BUF_TO_AGI(agibp); agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket); @@ -3190,16 +3187,17 @@ xlog_recover_process_iunlinks( /* * Find the agi for this ag. */ - agibp = xfs_buf_read(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)", - log->l_mp, agibp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp))); + error = xfs_read_agi(mp, NULL, agno, &agibp); + if (error) { + /* + * AGI is b0rked. Don't process it. + * + * We should probably mark the filesystem as corrupt + * after we've recovered all the ag's we can.... + */ + continue; } agi = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum)); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { @@ -3278,22 +3276,12 @@ xlog_recover_process_iunlinks( /* * Reacquire the agibuffer and continue around - * the loop. + * the loop. This should never fail as we know + * the buffer was good earlier on. */ - agibp = xfs_buf_read(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, - XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert( - "xlog_recover_process_iunlinks(#2)", - log->l_mp, agibp, - XFS_AG_DADDR(mp, agno, - XFS_AGI_DADDR(mp))); - } + error = xfs_read_agi(mp, NULL, agno, &agibp); + ASSERT(error == 0); agi = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu( - agi->agi_magicnum)); } } @@ -3980,11 +3968,9 @@ xlog_recover_check_summary( { xfs_mount_t *mp; xfs_agf_t *agfp; - xfs_agi_t *agip; xfs_buf_t *agfbp; xfs_buf_t *agibp; xfs_daddr_t agfdaddr; - xfs_daddr_t agidaddr; xfs_buf_t *sbbp; #ifdef XFS_LOUD_RECOVERY xfs_sb_t *sbp; @@ -3993,6 +3979,7 @@ xlog_recover_check_summary( __uint64_t freeblks; __uint64_t itotal; __uint64_t ifree; + int error; mp = log->l_mp; @@ -4016,21 +4003,14 @@ xlog_recover_check_summary( be32_to_cpu(agfp->agf_flcount); xfs_buf_relse(agfbp); - agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); - agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr, - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert("xlog_recover_check_summary(agi)", - mp, agibp, agidaddr); - } - agip = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum)); - ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum))); - ASSERT(be32_to_cpu(agip->agi_seqno) == agno); + error = xfs_read_agi(mp, NULL, agno, &agibp); + if (!error) { + struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); - itotal += be32_to_cpu(agip->agi_count); - ifree += be32_to_cpu(agip->agi_freecount); - xfs_buf_relse(agibp); + itotal += be32_to_cpu(agi->agi_count); + ifree += be32_to_cpu(agi->agi_freecount); + xfs_buf_relse(agibp); + } } sbbp = xfs_getsb(mp, 0); -- cgit v1.2.3 From 4805621a37d9b2b16641b5c68597651419e9e252 Mon Sep 17 00:00:00 2001 From: "From: Christoph Hellwig" Date: Fri, 28 Nov 2008 14:23:38 +1100 Subject: [XFS] factor out xfs_read_agf helper Add a helper to read the AGF header and perform basic verification. Based on hunks from a larger patch from Dave Chinner. (First sent on Juli 23rd) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_ag.h | 2 ++ fs/xfs/xfs_alloc.c | 69 +++++++++++++++++++++++++++++++++--------------- fs/xfs/xfs_log_recover.c | 26 ++++++++---------- 3 files changed, 61 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index f4a315390c6..f2e21817a22 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -91,6 +91,8 @@ typedef struct xfs_agf { #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) +extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); /* * Size of the unlinked inode hash table in the agi. diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index c47ce907572..028e44e58ea 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2233,44 +2233,41 @@ xfs_alloc_put_freelist( * Read in the allocation group header (free/alloc section). */ int /* error */ -xfs_alloc_read_agf( - xfs_mount_t *mp, /* mount point structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags, /* XFS_ALLOC_FLAG_... */ - xfs_buf_t **bpp) /* buffer for the ag freelist header */ +xfs_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_BUF_ */ + struct xfs_buf **bpp) /* buffer for the ag freelist header */ { - xfs_agf_t *agf; /* ag freelist header */ + struct xfs_agf *agf; /* ag freelist header */ int agf_ok; /* set if agf is consistent */ - xfs_buf_t *bp; /* return value */ - xfs_perag_t *pag; /* per allocation group data */ int error; ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), - (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U, - &bp); + XFS_FSS_TO_BB(mp, 1), flags, bpp); if (error) return error; - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (!bp) { - *bpp = NULL; + if (!*bpp) return 0; - } + + ASSERT(!XFS_BUF_GETERROR(*bpp)); + agf = XFS_BUF_TO_AGF(*bpp); + /* * Validate the magic number of the agf block. */ - agf = XFS_BUF_TO_AGF(bp); agf_ok = be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); + be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) && + be32_to_cpu(agf->agf_seqno) == agno; if (xfs_sb_version_haslazysbcount(&mp->m_sb)) agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= be32_to_cpu(agf->agf_length); @@ -2278,9 +2275,41 @@ xfs_alloc_read_agf( XFS_RANDOM_ALLOC_READ_AGF))) { XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", XFS_ERRLEVEL_LOW, mp, agf); - xfs_trans_brelse(tp, bp); + xfs_trans_brelse(tp, *bpp); return XFS_ERROR(EFSCORRUPTED); } + + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); + return 0; +} + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_alloc_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_ALLOC_FLAG_... */ + struct xfs_buf **bpp) /* buffer for the ag freelist header */ +{ + struct xfs_agf *agf; /* ag freelist header */ + struct xfs_perag *pag; /* per allocation group data */ + int error; + + ASSERT(agno != NULLAGNUMBER); + + error = xfs_read_agf(mp, tp, agno, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, + bpp); + if (error) + return error; + if (!*bpp) + return 0; + ASSERT(!XFS_BUF_GETERROR(*bpp)); + + agf = XFS_BUF_TO_AGF(*bpp); pag = &mp->m_perag[agno]; if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); @@ -2308,8 +2337,6 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); } #endif - XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF); - *bpp = bp; return 0; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b552676ca5c..d94987915d5 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3970,7 +3970,6 @@ xlog_recover_check_summary( xfs_agf_t *agfp; xfs_buf_t *agfbp; xfs_buf_t *agibp; - xfs_daddr_t agfdaddr; xfs_buf_t *sbbp; #ifdef XFS_LOUD_RECOVERY xfs_sb_t *sbp; @@ -3987,21 +3986,18 @@ xlog_recover_check_summary( itotal = 0LL; ifree = 0LL; for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { - agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)); - agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr, - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agfbp)) { - xfs_ioerror_alert("xlog_recover_check_summary(agf)", - mp, agfbp, agfdaddr); + error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, + "xlog_recover_check_summary(agf)" + "agf read failed agno %d error %d", + agno, error); + } else { + agfp = XFS_BUF_TO_AGF(agfbp); + freeblks += be32_to_cpu(agfp->agf_freeblks) + + be32_to_cpu(agfp->agf_flcount); + xfs_buf_relse(agfbp); } - agfp = XFS_BUF_TO_AGF(agfbp); - ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum)); - ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum))); - ASSERT(be32_to_cpu(agfp->agf_seqno) == agno); - - freeblks += be32_to_cpu(agfp->agf_freeblks) + - be32_to_cpu(agfp->agf_flcount); - xfs_buf_relse(agfbp); error = xfs_read_agi(mp, NULL, agno, &agibp); if (!error) { -- cgit v1.2.3 From b28708d6a0a3ed65a68f0dcd8e6d1c09f14e5cf3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:38 +1100 Subject: [XFS] sanitize xlog_in_core_t definition Move all fields from xlog_iclog_fields_t into xlog_in_core_t instead of having them in a substructure and the using #defines to make it look like they were directly in xlog_in_core_t. Also document that xlog_in_core_2_t is grossly misnamed, and make all references to it typesafe. (First sent on Semptember 15th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_log.c | 12 +++--------- fs/xfs/xfs_log_priv.h | 46 +++++++++++++--------------------------------- fs/xfs/xfs_log_recover.c | 7 +++---- 3 files changed, 19 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 8a5b05536a2..aadaa1472f6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1029,12 +1029,6 @@ xlog_iodone(xfs_buf_t *bp) ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); aborted = 0; - - /* - * Some versions of cpp barf on the recursive definition of - * ic_log -> hic_fields.ic_log and expand ic_log twice when - * it is passed through two macros. Workaround broken cpp. - */ l = iclog->ic_log; /* @@ -1301,7 +1295,7 @@ xlog_alloc_log(xfs_mount_t *mp, XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); iclog->ic_bp = bp; - iclog->hic_data = bp->b_addr; + iclog->ic_data = bp->b_addr; #ifdef DEBUG log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); #endif @@ -1321,7 +1315,7 @@ xlog_alloc_log(xfs_mount_t *mp, atomic_set(&iclog->ic_refcnt, 0); spin_lock_init(&iclog->ic_callback_lock); iclog->ic_callback_tail = &(iclog->ic_callback); - iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; + iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); @@ -3463,7 +3457,7 @@ xlog_verify_iclog(xlog_t *log, ptr = iclog->ic_datap; base_ptr = ptr; ophead = (xlog_op_header_t *)ptr; - xhdr = (xlog_in_core_2_t *)&iclog->ic_header; + xhdr = iclog->ic_data; for (i = 0; i < len; i++) { ophead = (xlog_op_header_t *)ptr; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b39a1980e82..654167be0ef 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -310,6 +310,16 @@ typedef struct xlog_rec_ext_header { } xlog_rec_ext_header_t; #ifdef __KERNEL__ + +/* + * Quite misnamed, because this union lays out the actual on-disk log buffer. + */ +typedef union xlog_in_core2 { + xlog_rec_header_t hic_header; + xlog_rec_ext_header_t hic_xheader; + char hic_sector[XLOG_HEADER_SIZE]; +} xlog_in_core_2_t; + /* * - A log record header is 512 bytes. There is plenty of room to grow the * xlog_rec_header_t into the reserved space. @@ -339,7 +349,7 @@ typedef struct xlog_rec_ext_header { * We'll put all the read-only and l_icloglock fields in the first cacheline, * and move everything else out to subsequent cachelines. */ -typedef struct xlog_iclog_fields { +typedef struct xlog_in_core { sv_t ic_force_wait; sv_t ic_write_wait; struct xlog_in_core *ic_next; @@ -362,40 +372,10 @@ typedef struct xlog_iclog_fields { /* reference counts need their own cacheline */ atomic_t ic_refcnt ____cacheline_aligned_in_smp; -} xlog_iclog_fields_t; - -typedef union xlog_in_core2 { - xlog_rec_header_t hic_header; - xlog_rec_ext_header_t hic_xheader; - char hic_sector[XLOG_HEADER_SIZE]; -} xlog_in_core_2_t; - -typedef struct xlog_in_core { - xlog_iclog_fields_t hic_fields; - xlog_in_core_2_t *hic_data; + xlog_in_core_2_t *ic_data; +#define ic_header ic_data->hic_header } xlog_in_core_t; -/* - * Defines to save our code from this glop. - */ -#define ic_force_wait hic_fields.ic_force_wait -#define ic_write_wait hic_fields.ic_write_wait -#define ic_next hic_fields.ic_next -#define ic_prev hic_fields.ic_prev -#define ic_bp hic_fields.ic_bp -#define ic_log hic_fields.ic_log -#define ic_callback hic_fields.ic_callback -#define ic_callback_lock hic_fields.ic_callback_lock -#define ic_callback_tail hic_fields.ic_callback_tail -#define ic_trace hic_fields.ic_trace -#define ic_size hic_fields.ic_size -#define ic_offset hic_fields.ic_offset -#define ic_refcnt hic_fields.ic_refcnt -#define ic_bwritecnt hic_fields.ic_bwritecnt -#define ic_state hic_fields.ic_state -#define ic_datap hic_fields.ic_datap -#define ic_header hic_data->hic_header - /* * The reservation head lsn is not made up of a cycle number and block number. * Instead, it uses a cycle number and byte number. Logs don't expect to diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index d94987915d5..9abb96a7674 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3332,7 +3332,6 @@ xlog_pack_data( int size = iclog->ic_offset + roundoff; __be32 cycle_lsn; xfs_caddr_t dp; - xlog_in_core_2_t *xhdr; xlog_pack_data_checksum(log, iclog, size); @@ -3347,7 +3346,8 @@ xlog_pack_data( } if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - xhdr = (xlog_in_core_2_t *)&iclog->ic_header; + xlog_in_core_2_t *xhdr = iclog->ic_data; + for ( ; i < BTOBB(size); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -3405,7 +3405,6 @@ xlog_unpack_data( xlog_t *log) { int i, j, k; - xlog_in_core_2_t *xhdr; for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { @@ -3414,7 +3413,7 @@ xlog_unpack_data( } if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - xhdr = (xlog_in_core_2_t *)rhead; + xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); -- cgit v1.2.3 From d42f08f61c5e7f0ed4c6b6df4c9987ddb85ec66e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:38 +1100 Subject: [XFS] kill xfs_ialloc_log_di xfs_ialloc_log_di is only used to log the full inode core + di_next_unlinked. That means all the offset magic is not nessecary and we can simply use xfs_trans_log_buf directly. Also add a comment describing what we should do here instead. (First sent on October 7th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_dinode.h | 34 ------------------------ fs/xfs/xfs_ialloc.c | 75 +++++++---------------------------------------------- 2 files changed, 10 insertions(+), 99 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index d7cf392cc85..10f9204b81e 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -105,40 +105,6 @@ typedef struct xfs_dinode #define XFS_MAXLINK ((1U << 31) - 1U) #define XFS_MAXLINK_1 65535U -/* - * Bit names for logging disk inodes only - */ -#define XFS_DI_MAGIC 0x0000001 -#define XFS_DI_MODE 0x0000002 -#define XFS_DI_VERSION 0x0000004 -#define XFS_DI_FORMAT 0x0000008 -#define XFS_DI_ONLINK 0x0000010 -#define XFS_DI_UID 0x0000020 -#define XFS_DI_GID 0x0000040 -#define XFS_DI_NLINK 0x0000080 -#define XFS_DI_PROJID 0x0000100 -#define XFS_DI_PAD 0x0000200 -#define XFS_DI_ATIME 0x0000400 -#define XFS_DI_MTIME 0x0000800 -#define XFS_DI_CTIME 0x0001000 -#define XFS_DI_SIZE 0x0002000 -#define XFS_DI_NBLOCKS 0x0004000 -#define XFS_DI_EXTSIZE 0x0008000 -#define XFS_DI_NEXTENTS 0x0010000 -#define XFS_DI_NAEXTENTS 0x0020000 -#define XFS_DI_FORKOFF 0x0040000 -#define XFS_DI_AFORMAT 0x0080000 -#define XFS_DI_DMEVMASK 0x0100000 -#define XFS_DI_DMSTATE 0x0200000 -#define XFS_DI_FLAGS 0x0400000 -#define XFS_DI_GEN 0x0800000 -#define XFS_DI_NEXT_UNLINKED 0x1000000 -#define XFS_DI_U 0x2000000 -#define XFS_DI_A 0x4000000 -#define XFS_DI_NUM_BITS 27 -#define XFS_DI_ALL_BITS ((1 << XFS_DI_NUM_BITS) - 1) -#define XFS_DI_CORE_BITS (XFS_DI_ALL_BITS & ~(XFS_DI_U|XFS_DI_A)) - /* * Values for di_format */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index efb65fea203..86d463ee7b9 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -41,68 +41,6 @@ #include "xfs_error.h" #include "xfs_bmap.h" -/* - * Log specified fields for the inode given by bp and off. - */ -STATIC void -xfs_ialloc_log_di( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* inode buffer */ - int off, /* index of inode in buffer */ - int fields) /* bitmask of fields to log */ -{ - int first; /* first byte number */ - int ioffset; /* off in bytes */ - int last; /* last byte number */ - xfs_mount_t *mp; /* mount point structure */ - static const short offsets[] = { /* field offsets */ - /* keep in sync with bits */ - offsetof(xfs_dinode_core_t, di_magic), - offsetof(xfs_dinode_core_t, di_mode), - offsetof(xfs_dinode_core_t, di_version), - offsetof(xfs_dinode_core_t, di_format), - offsetof(xfs_dinode_core_t, di_onlink), - offsetof(xfs_dinode_core_t, di_uid), - offsetof(xfs_dinode_core_t, di_gid), - offsetof(xfs_dinode_core_t, di_nlink), - offsetof(xfs_dinode_core_t, di_projid), - offsetof(xfs_dinode_core_t, di_pad), - offsetof(xfs_dinode_core_t, di_atime), - offsetof(xfs_dinode_core_t, di_mtime), - offsetof(xfs_dinode_core_t, di_ctime), - offsetof(xfs_dinode_core_t, di_size), - offsetof(xfs_dinode_core_t, di_nblocks), - offsetof(xfs_dinode_core_t, di_extsize), - offsetof(xfs_dinode_core_t, di_nextents), - offsetof(xfs_dinode_core_t, di_anextents), - offsetof(xfs_dinode_core_t, di_forkoff), - offsetof(xfs_dinode_core_t, di_aformat), - offsetof(xfs_dinode_core_t, di_dmevmask), - offsetof(xfs_dinode_core_t, di_dmstate), - offsetof(xfs_dinode_core_t, di_flags), - offsetof(xfs_dinode_core_t, di_gen), - offsetof(xfs_dinode_t, di_next_unlinked), - offsetof(xfs_dinode_t, di_u), - offsetof(xfs_dinode_t, di_a), - sizeof(xfs_dinode_t) - }; - - - ASSERT(offsetof(xfs_dinode_t, di_core) == 0); - ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0); - mp = tp->t_mountp; - /* - * Get the inode-relative first and last bytes for these fields - */ - xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last); - /* - * Convert to buffer offsets and log it. - */ - ioffset = off << mp->m_sb.sb_inodelog; - first += ioffset; - last += ioffset; - xfs_trans_log_buf(tp, bp, first, last); -} /* * Allocation group level functions. @@ -406,18 +344,25 @@ xfs_ialloc_ag_alloc( XFS_BUF_LOCK); ASSERT(fbuf); ASSERT(!XFS_BUF_GETERROR(fbuf)); + /* - * Set initial values for the inodes in this buffer. + * Initialize all inodes in this buffer and then log them. + * + * XXX: It would be much better if we had just one transaction to + * log a whole cluster of inodes instead of all the indivdual + * transactions causing a lot of log traffic. */ xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { + int ioffset = i << args.mp->m_sb.sb_inodelog; + uint isize = sizeof(xfs_dinode_t) + sizeof(__be32); + free = XFS_MAKE_IPTR(args.mp, fbuf, i); free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); free->di_core.di_version = version; free->di_core.di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); - xfs_ialloc_log_di(tp, fbuf, i, - XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); + xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } xfs_trans_inode_alloc_buf(tp, fbuf); } -- cgit v1.2.3 From 81591fe2db19d0fc1ec2aaaa6a790a5ab97ac3ab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:39 +1100 Subject: [XFS] kill xfs_dinode_core_t Now that we have a separate xfs_icdinode_t for the in-core inode which gets logged there is no need anymore for the xfs_dinode vs xfs_dinode_core split - the fact that part of the structure gets logged through the inode log item and a small part not can better be described in a comment. All sizeof operations on the dinode_core either really wanted the icdinode and are switched to that one, or had already added the size of the agi unlinked list pointer. Later both will be replaced with helpers once we get the larger CRC-enabled dinode. Removing the data and attribute fork unions also has the advantage that xfs_dinode.h doesn't need to pull in every header under the sun. While we're at it also add some more comments describing the dinode structure. (First sent on October 7th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_dinode.h | 104 +++++++++++++++++++++++------------------------ fs/xfs/xfs_dir2_sf.h | 7 ---- fs/xfs/xfs_ialloc.c | 8 ++-- fs/xfs/xfs_inode.c | 85 +++++++++++++++++++------------------- fs/xfs/xfs_inode.h | 7 ++-- fs/xfs/xfs_inode_item.c | 2 +- fs/xfs/xfs_itable.c | 20 ++++----- fs/xfs/xfs_log_recover.c | 29 ++++++------- fs/xfs/xfs_mount.c | 3 +- 9 files changed, 124 insertions(+), 141 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 10f9204b81e..0b7ebf922af 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -18,32 +18,32 @@ #ifndef __XFS_DINODE_H__ #define __XFS_DINODE_H__ -struct xfs_buf; -struct xfs_mount; - #define XFS_DINODE_VERSION_1 1 #define XFS_DINODE_VERSION_2 2 #define XFS_DINODE_GOOD_VERSION(v) \ (((v) == XFS_DINODE_VERSION_1 || (v) == XFS_DINODE_VERSION_2)) #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ -/* - * Disk inode structure. - * This is just the header; the inode is expanded to fill a variable size - * with the last field expanding. It is split into the core and "other" - * because we only need the core part in the in-core inode. - */ typedef struct xfs_timestamp { __be32 t_sec; /* timestamp seconds */ __be32 t_nsec; /* timestamp nanoseconds */ } xfs_timestamp_t; /* - * Note: Coordinate changes to this structure with the XFS_DI_* #defines - * below, the offsets table in xfs_ialloc_log_di() and struct xfs_icdinode - * in xfs_inode.h. + * On-disk inode structure. + * + * This is just the header or "dinode core", the inode is expanded to fill a + * variable size the leftover area split into a data and an attribute fork. + * The format of the data and attribute fork depends on the format of the + * inode as indicated by di_format and di_aformat. To access the data and + * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros + * below. + * + * There is a very similar struct icdinode in xfs_inode which matches the + * layout of the first 96 bytes of this structure, but is kept in native + * format instead of big endian. */ -typedef struct xfs_dinode_core { +typedef struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ __be16 di_mode; /* mode and type of file */ __u8 di_version; /* inode version */ @@ -69,33 +69,12 @@ typedef struct xfs_dinode_core { __be16 di_dmstate; /* DMIG state info */ __be16 di_flags; /* random flags, XFS_DIFLAG_... */ __be32 di_gen; /* generation number */ -} xfs_dinode_core_t; -#define DI_MAX_FLUSH 0xffff + /* di_next_unlinked is the only non-core field in the old dinode */ + __be32 di_next_unlinked;/* agi unlinked list ptr */ +} __attribute__((packed)) xfs_dinode_t; -typedef struct xfs_dinode -{ - xfs_dinode_core_t di_core; - /* - * In adding anything between the core and the union, be - * sure to update the macros like XFS_LITINO below. - */ - __be32 di_next_unlinked;/* agi unlinked list ptr */ - union { - xfs_bmdr_block_t di_bmbt; /* btree root block */ - xfs_bmbt_rec_32_t di_bmx[1]; /* extent list */ - xfs_dir2_sf_t di_dir2sf; /* shortform directory v2 */ - char di_c[1]; /* local contents */ - __be32 di_dev; /* device for S_IFCHR/S_IFBLK */ - uuid_t di_muuid; /* mount point value */ - char di_symlink[1]; /* local symbolic link */ - } di_u; - union { - xfs_bmdr_block_t di_abmbt; /* btree root block */ - xfs_bmbt_rec_32_t di_abmx[1]; /* extent list */ - xfs_attr_shortform_t di_attrsf; /* shortform attribute list */ - } di_a; -} xfs_dinode_t; +#define DI_MAX_FLUSH 0xffff /* * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. @@ -108,14 +87,12 @@ typedef struct xfs_dinode /* * Values for di_format */ -typedef enum xfs_dinode_fmt -{ - XFS_DINODE_FMT_DEV, /* CHR, BLK: di_dev */ - XFS_DINODE_FMT_LOCAL, /* DIR, REG: di_c */ - /* LNK: di_symlink */ - XFS_DINODE_FMT_EXTENTS, /* DIR, REG, LNK: di_bmx */ - XFS_DINODE_FMT_BTREE, /* DIR, REG, LNK: di_bmbt */ - XFS_DINODE_FMT_UUID /* MNT: di_uuid */ +typedef enum xfs_dinode_fmt { + XFS_DINODE_FMT_DEV, /* xfs_dev_t */ + XFS_DINODE_FMT_LOCAL, /* bulk data */ + XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ + XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ + XFS_DINODE_FMT_UUID /* uuid_t */ } xfs_dinode_fmt_t; /* @@ -136,8 +113,8 @@ typedef enum xfs_dinode_fmt /* * Inode data & attribute fork sizes, per inode. */ -#define XFS_DFORK_Q(dip) ((dip)->di_core.di_forkoff != 0) -#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_core.di_forkoff << 3)) +#define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0) +#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3)) #define XFS_DFORK_DSIZE(dip,mp) \ (XFS_DFORK_Q(dip) ? \ @@ -152,22 +129,41 @@ typedef enum xfs_dinode_fmt XFS_DFORK_DSIZE(dip, mp) : \ XFS_DFORK_ASIZE(dip, mp)) -#define XFS_DFORK_DPTR(dip) ((dip)->di_u.di_c) +/* + * Return pointers to the data or attribute forks. + */ +#define XFS_DFORK_DPTR(dip) \ + ((char *)(dip) + sizeof(struct xfs_dinode)) #define XFS_DFORK_APTR(dip) \ - ((dip)->di_u.di_c + XFS_DFORK_BOFF(dip)) + (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) #define XFS_DFORK_PTR(dip,w) \ ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) + #define XFS_DFORK_FORMAT(dip,w) \ ((w) == XFS_DATA_FORK ? \ - (dip)->di_core.di_format : \ - (dip)->di_core.di_aformat) + (dip)->di_format : \ + (dip)->di_aformat) #define XFS_DFORK_NEXTENTS(dip,w) \ ((w) == XFS_DATA_FORK ? \ - be32_to_cpu((dip)->di_core.di_nextents) : \ - be16_to_cpu((dip)->di_core.di_anextents)) + be32_to_cpu((dip)->di_nextents) : \ + be16_to_cpu((dip)->di_anextents)) #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) +/* + * For block and character special files the 32bit dev_t is stored at the + * beginning of the data fork. + */ +static inline xfs_dev_t xfs_dinode_get_rdev(struct xfs_dinode *dip) +{ + return be32_to_cpu(*(__be32 *)XFS_DFORK_DPTR(dip)); +} + +static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) +{ + *(__be32 *)XFS_DFORK_DPTR(dip) = cpu_to_be32(rdev); +} + /* * Values for di_flags * There should be a one-to-one correspondence between these flags and the diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h index deecc9d238f..6ac44b550d3 100644 --- a/fs/xfs/xfs_dir2_sf.h +++ b/fs/xfs/xfs_dir2_sf.h @@ -33,13 +33,6 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; -/* - * Maximum size of a shortform directory. - */ -#define XFS_DIR2_SF_MAX_SIZE \ - (XFS_DINODE_MAX_SIZE - (uint)sizeof(xfs_dinode_core_t) - \ - (uint)sizeof(xfs_agino_t)) - /* * Inode number stored as 8 8-bit values. */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 86d463ee7b9..47e94288fcb 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -355,12 +355,12 @@ xfs_ialloc_ag_alloc( xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { int ioffset = i << args.mp->m_sb.sb_inodelog; - uint isize = sizeof(xfs_dinode_t) + sizeof(__be32); + uint isize = sizeof(struct xfs_dinode); free = XFS_MAKE_IPTR(args.mp, fbuf, i); - free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); - free->di_core.di_version = version; - free->di_core.di_gen = cpu_to_be32(gen); + free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); + free->di_version = version; + free->di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 46b0526acd4..1d2912dc522 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -174,8 +174,8 @@ xfs_imap_to_bp( dip = (xfs_dinode_t *)xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); - di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && - XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); + di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && + XFS_DINODE_GOOD_VERSION(dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { @@ -191,7 +191,7 @@ xfs_imap_to_bp( "daddr %lld #%d (magic=%x)", XFS_BUFTARG_NAME(mp->m_ddev_targp), (unsigned long long)imap->im_blkno, i, - be16_to_cpu(dip->di_core.di_magic)); + be16_to_cpu(dip->di_magic)); #endif xfs_trans_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); @@ -350,26 +350,26 @@ xfs_iformat( XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); error = 0; - if (unlikely(be32_to_cpu(dip->di_core.di_nextents) + - be16_to_cpu(dip->di_core.di_anextents) > - be64_to_cpu(dip->di_core.di_nblocks))) { + if (unlikely(be32_to_cpu(dip->di_nextents) + + be16_to_cpu(dip->di_anextents) > + be64_to_cpu(dip->di_nblocks))) { xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", (unsigned long long)ip->i_ino, - (int)(be32_to_cpu(dip->di_core.di_nextents) + - be16_to_cpu(dip->di_core.di_anextents)), + (int)(be32_to_cpu(dip->di_nextents) + + be16_to_cpu(dip->di_anextents)), (unsigned long long) - be64_to_cpu(dip->di_core.di_nblocks)); + be64_to_cpu(dip->di_nblocks)); XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); return XFS_ERROR(EFSCORRUPTED); } - if (unlikely(dip->di_core.di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { + if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", (unsigned long long)ip->i_ino, - dip->di_core.di_forkoff); + dip->di_forkoff); XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); return XFS_ERROR(EFSCORRUPTED); @@ -380,25 +380,25 @@ xfs_iformat( case S_IFCHR: case S_IFBLK: case S_IFSOCK: - if (unlikely(dip->di_core.di_format != XFS_DINODE_FMT_DEV)) { + if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); return XFS_ERROR(EFSCORRUPTED); } ip->i_d.di_size = 0; ip->i_size = 0; - ip->i_df.if_u2.if_rdev = be32_to_cpu(dip->di_u.di_dev); + ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); break; case S_IFREG: case S_IFLNK: case S_IFDIR: - switch (dip->di_core.di_format) { + switch (dip->di_format) { case XFS_DINODE_FMT_LOCAL: /* * no local regular files yet */ - if (unlikely((be16_to_cpu(dip->di_core.di_mode) & S_IFMT) == S_IFREG)) { + if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt inode %Lu " "(local format for regular file).", @@ -409,7 +409,7 @@ xfs_iformat( return XFS_ERROR(EFSCORRUPTED); } - di_size = be64_to_cpu(dip->di_core.di_size); + di_size = be64_to_cpu(dip->di_size); if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt inode %Lu " @@ -451,7 +451,7 @@ xfs_iformat( ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); - switch (dip->di_core.di_aformat) { + switch (dip->di_aformat) { case XFS_DINODE_FMT_LOCAL: atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); size = be16_to_cpu(atp->hdr.totsize); @@ -663,7 +663,7 @@ xfs_iformat_btree( void xfs_dinode_from_disk( xfs_icdinode_t *to, - xfs_dinode_core_t *from) + xfs_dinode_t *from) { to->di_magic = be16_to_cpu(from->di_magic); to->di_mode = be16_to_cpu(from->di_mode); @@ -697,7 +697,7 @@ xfs_dinode_from_disk( void xfs_dinode_to_disk( - xfs_dinode_core_t *to, + xfs_dinode_t *to, xfs_icdinode_t *from) { to->di_magic = cpu_to_be16(from->di_magic); @@ -784,9 +784,7 @@ uint xfs_dic2xflags( xfs_dinode_t *dip) { - xfs_dinode_core_t *dic = &dip->di_core; - - return _xfs_dic2xflags(be16_to_cpu(dic->di_flags)) | + return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); } @@ -905,12 +903,12 @@ xfs_iread( * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ - if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { + if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " - "dip->di_core.di_magic (0x%x) != " + "dip->di_magic (0x%x) != " "XFS_DINODE_MAGIC (0x%x)", - be16_to_cpu(dip->di_core.di_magic), + be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); #endif /* DEBUG */ error = XFS_ERROR(EINVAL); @@ -924,8 +922,8 @@ xfs_iread( * specific information. * Otherwise, just get the truly permanent information. */ - if (dip->di_core.di_mode) { - xfs_dinode_from_disk(&ip->i_d, &dip->di_core); + if (dip->di_mode) { + xfs_dinode_from_disk(&ip->i_d, dip); error = xfs_iformat(ip, dip); if (error) { #ifdef DEBUG @@ -936,10 +934,10 @@ xfs_iread( goto out_brelse; } } else { - ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic); - ip->i_d.di_version = dip->di_core.di_version; - ip->i_d.di_gen = be32_to_cpu(dip->di_core.di_gen); - ip->i_d.di_flushiter = be16_to_cpu(dip->di_core.di_flushiter); + ip->i_d.di_magic = be16_to_cpu(dip->di_magic); + ip->i_d.di_version = dip->di_version; + ip->i_d.di_gen = be32_to_cpu(dip->di_gen); + ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); /* * Make sure to pull in the mode here as well in * case the inode is released without being used. @@ -2295,7 +2293,7 @@ xfs_ifree( * This is a temporary hack that would require a proper fix * in the future. */ - dip->di_core.di_mode = 0; + dip->di_mode = 0; if (delete) { xfs_ifree_cluster(ip, tp, first_ino); @@ -2909,15 +2907,16 @@ xfs_iflush_fork( case XFS_DINODE_FMT_DEV: if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { ASSERT(whichfork == XFS_DATA_FORK); - dip->di_u.di_dev = cpu_to_be32(ip->i_df.if_u2.if_rdev); + xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); } break; case XFS_DINODE_FMT_UUID: if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { ASSERT(whichfork == XFS_DATA_FORK); - memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid, - sizeof(uuid_t)); + memcpy(XFS_DFORK_DPTR(dip), + &ip->i_df.if_u2.if_uuid, + sizeof(uuid_t)); } break; @@ -3295,11 +3294,11 @@ xfs_iflush_int( */ xfs_synchronize_atime(ip); - if (XFS_TEST_ERROR(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC, + if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", - ip->i_ino, be16_to_cpu(dip->di_core.di_magic), dip); + ip->i_ino, be16_to_cpu(dip->di_magic), dip); goto corrupt_out; } if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, @@ -3362,7 +3361,7 @@ xfs_iflush_int( * because if the inode is dirty at all the core must * be. */ - xfs_dinode_to_disk(&dip->di_core, &ip->i_d); + xfs_dinode_to_disk(dip, &ip->i_d); /* Wrap, we never let the log put out DI_MAX_FLUSH */ if (ip->i_d.di_flushiter == DI_MAX_FLUSH) @@ -3382,7 +3381,7 @@ xfs_iflush_int( * Convert it back. */ ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - dip->di_core.di_onlink = cpu_to_be16(ip->i_d.di_nlink); + dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); } else { /* * The superblock version has already been bumped, @@ -3390,12 +3389,12 @@ xfs_iflush_int( * format permanent. */ ip->i_d.di_version = XFS_DINODE_VERSION_2; - dip->di_core.di_version = XFS_DINODE_VERSION_2; + dip->di_version = XFS_DINODE_VERSION_2; ip->i_d.di_onlink = 0; - dip->di_core.di_onlink = 0; + dip->di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - memset(&(dip->di_core.di_pad[0]), 0, - sizeof(dip->di_core.di_pad)); + memset(&(dip->di_pad[0]), 0, + sizeof(dip->di_pad)); ASSERT(ip->i_d.di_projid == 0); } } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ea691c738f2..705083a8ffa 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -19,7 +19,6 @@ #define __XFS_INODE_H__ struct xfs_dinode; -struct xfs_dinode_core; struct xfs_inode; /* @@ -112,7 +111,7 @@ typedef struct xfs_ictimestamp { } xfs_ictimestamp_t; /* - * NOTE: This structure must be kept identical to struct xfs_dinode_core + * NOTE: This structure must be kept identical to struct xfs_dinode * in xfs_dinode.h except for the endianess annotations. */ typedef struct xfs_icdinode { @@ -553,8 +552,8 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, struct xfs_buf **, xfs_daddr_t, uint, uint); void xfs_dinode_from_disk(struct xfs_icdinode *, - struct xfs_dinode_core *); -void xfs_dinode_to_disk(struct xfs_dinode_core *, + struct xfs_dinode *); +void xfs_dinode_to_disk(struct xfs_dinode *, struct xfs_icdinode *); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index aa9bf05060c..27f18c15e16 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -281,7 +281,7 @@ xfs_inode_item_format( xfs_mark_inode_dirty_sync(ip); vecp->i_addr = (xfs_caddr_t)&ip->i_d; - vecp->i_len = sizeof(xfs_dinode_core_t); + vecp->i_len = sizeof(struct xfs_icdinode); XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); vecp++; nvecs++; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 35118032a5d..b3578cdc33d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -125,13 +125,9 @@ STATIC void xfs_bulkstat_one_dinode( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t ino, /* inode number to get data for */ - xfs_dinode_t *dip, /* dinode inode pointer */ + xfs_dinode_t *dic, /* dinode inode pointer */ xfs_bstat_t *buf) /* return buffer */ { - xfs_dinode_core_t *dic; /* dinode core info pointer */ - - dic = &dip->di_core; - /* * The inode format changed when we moved the link count and * made it 32 bits long. If this is an old format inode, @@ -162,7 +158,7 @@ xfs_bulkstat_one_dinode( buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); - buf->bs_xflags = xfs_dic2xflags(dip); + buf->bs_xflags = xfs_dic2xflags(dic); buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; buf->bs_extents = be32_to_cpu(dic->di_nextents); buf->bs_gen = be32_to_cpu(dic->di_gen); @@ -173,7 +169,7 @@ xfs_bulkstat_one_dinode( switch (dic->di_format) { case XFS_DINODE_FMT_DEV: - buf->bs_rdev = be32_to_cpu(dip->di_u.di_dev); + buf->bs_rdev = xfs_dinode_get_rdev(dic); buf->bs_blksize = BLKDEV_IOSIZE; buf->bs_blocks = 0; break; @@ -287,19 +283,19 @@ xfs_bulkstat_use_dinode( * to disk yet. This is a temporary hack that would require a proper * fix in the future. */ - if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC || - !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) || - !dip->di_core.di_mode) + if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || + !XFS_DINODE_GOOD_VERSION(dip->di_version) || + !dip->di_mode) return 0; if (flags & BULKSTAT_FG_QUICK) { *dipp = dip; return 1; } /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ - aformat = dip->di_core.di_aformat; + aformat = dip->di_aformat; if ((XFS_DFORK_Q(dip) == 0) || (aformat == XFS_DINODE_FMT_LOCAL) || - (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) { + (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { *dipp = dip; return 1; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9abb96a7674..4099618f5fa 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2320,7 +2320,7 @@ xlog_recover_do_inode_trans( * Make sure the place we're flushing out to really looks * like an inode! */ - if (unlikely(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC)) { + if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", @@ -2343,12 +2343,12 @@ xlog_recover_do_inode_trans( } /* Skip replay when the on disk inode is newer than the log one */ - if (dicp->di_flushiter < be16_to_cpu(dip->di_core.di_flushiter)) { + if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { /* * Deal with the wrap case, DI_MAX_FLUSH is less * than smaller numbers */ - if (be16_to_cpu(dip->di_core.di_flushiter) == DI_MAX_FLUSH && + if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { /* do nothing */ } else { @@ -2408,7 +2408,7 @@ xlog_recover_do_inode_trans( error = EFSCORRUPTED; goto error; } - if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) { + if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); @@ -2420,23 +2420,24 @@ xlog_recover_do_inode_trans( } /* The core is in in-core format */ - xfs_dinode_to_disk(&dip->di_core, - (xfs_icdinode_t *)item->ri_buf[1].i_addr); + xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); /* the rest is in on-disk format */ - if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) { - memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t), - item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t), - item->ri_buf[1].i_len - sizeof(xfs_dinode_core_t)); + if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { + memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), + item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), + item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); } fields = in_f->ilf_fields; switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { case XFS_ILOG_DEV: - dip->di_u.di_dev = cpu_to_be32(in_f->ilf_u.ilfu_rdev); + xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); break; case XFS_ILOG_UUID: - dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid; + memcpy(XFS_DFORK_DPTR(dip), + &in_f->ilf_u.ilfu_uuid, + sizeof(uuid_t)); break; } @@ -2452,12 +2453,12 @@ xlog_recover_do_inode_trans( switch (fields & XFS_ILOG_DFORK) { case XFS_ILOG_DDATA: case XFS_ILOG_DEXT: - memcpy(&dip->di_u, src, len); + memcpy(XFS_DFORK_DPTR(dip), src, len); break; case XFS_ILOG_DBROOT: xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, - &dip->di_u.di_bmbt, + (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), XFS_DFORK_DSIZE(dip, mp)); break; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 177976dfea0..3f999b1c038 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -575,8 +575,7 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; - mp->m_litino = sbp->sb_inodesize - - ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); + mp->m_litino = sbp->sb_inodesize - sizeof(struct xfs_dinode); mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; -- cgit v1.2.3 From 51ce16d519da0bc3c548e0facef7cb3aab1ac8cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:39 +1100 Subject: [XFS] kill XFS_DINODE_VERSION_ defines These names don't add any value at all over just using the numerical values. (First sent on October 9th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_ioctl.c | 2 +- fs/xfs/xfs_dinode.h | 7 ++----- fs/xfs/xfs_ialloc.c | 4 ++-- fs/xfs/xfs_inode.c | 17 ++++++++--------- fs/xfs/xfs_inode_item.c | 7 +++---- fs/xfs/xfs_itable.c | 2 +- fs/xfs/xfs_utils.c | 6 +++--- 7 files changed, 20 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d5970599953..534b175f3a4 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1131,7 +1131,7 @@ xfs_ioctl_setattr( * the superblock version number since projids didn't * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. */ - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) + if (ip->i_d.di_version == 1) xfs_bump_ino_vers2(tp, ip); } diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 0b7ebf922af..162e8726df5 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -18,11 +18,8 @@ #ifndef __XFS_DINODE_H__ #define __XFS_DINODE_H__ -#define XFS_DINODE_VERSION_1 1 -#define XFS_DINODE_VERSION_2 2 -#define XFS_DINODE_GOOD_VERSION(v) \ - (((v) == XFS_DINODE_VERSION_1 || (v) == XFS_DINODE_VERSION_2)) -#define XFS_DINODE_MAGIC 0x494e /* 'IN' */ +#define XFS_DINODE_MAGIC 0x494e /* 'IN' */ +#define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2)) typedef struct xfs_timestamp { __be32 t_sec; /* timestamp seconds */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 47e94288fcb..16eda31fe79 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -321,9 +321,9 @@ xfs_ialloc_ag_alloc( * able to use the file system. */ if (xfs_sb_version_hasnlink(&args.mp->m_sb)) - version = XFS_DINODE_VERSION_2; + version = 2; else - version = XFS_DINODE_VERSION_1; + version = 1; /* * Seed the new inode cluster with a random generation number. This diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1d2912dc522..083395cd675 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -965,7 +965,7 @@ xfs_iread( * the new format. We don't change the version number so that we * can distinguish this from a real new format inode. */ - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + if (ip->i_d.di_version == 1) { ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; ip->i_d.di_projid = 0; @@ -1139,8 +1139,8 @@ xfs_ialloc( * here rather than here and in the flush/logging code. */ if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && - ip->i_d.di_version == XFS_DINODE_VERSION_1) { - ip->i_d.di_version = XFS_DINODE_VERSION_2; + ip->i_d.di_version == 1) { + ip->i_d.di_version = 2; /* * We've already zeroed the old link count, the projid field, * and the pad field. @@ -1150,7 +1150,7 @@ xfs_ialloc( /* * Project ids won't be stored on disk if we are using a version 1 inode. */ - if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) + if ((prid != 0) && (ip->i_d.di_version == 1)) xfs_bump_ino_vers2(tp, ip); if (pip && XFS_INHERIT_GID(pip)) { @@ -3373,9 +3373,8 @@ xfs_iflush_int( * convert back to the old inode format. If the superblock version * has been updated, then make the conversion permanent. */ - ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || - xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); + if (ip->i_d.di_version == 1) { if (!xfs_sb_version_hasnlink(&mp->m_sb)) { /* * Convert it back. @@ -3388,8 +3387,8 @@ xfs_iflush_int( * so just make the conversion to the new inode * format permanent. */ - ip->i_d.di_version = XFS_DINODE_VERSION_2; - dip->di_version = XFS_DINODE_VERSION_2; + ip->i_d.di_version = 2; + dip->di_version = 2; ip->i_d.di_onlink = 0; dip->di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 27f18c15e16..c43118148e6 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -296,9 +296,8 @@ xfs_inode_item_format( * has a new version number, then we don't bother converting back. */ mp = ip->i_mount; - ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || - xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); + if (ip->i_d.di_version == 1) { if (!xfs_sb_version_hasnlink(&mp->m_sb)) { /* * Convert it back. @@ -311,7 +310,7 @@ xfs_inode_item_format( * so just make the conversion to the new inode * format permanent. */ - ip->i_d.di_version = XFS_DINODE_VERSION_2; + ip->i_d.di_version = 2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index b3578cdc33d..2cf16f4695e 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -139,7 +139,7 @@ xfs_bulkstat_one_dinode( * the new format. We don't change the version number so that we * can distinguish this from a real new format inode. */ - if (dic->di_version == XFS_DINODE_VERSION_1) { + if (dic->di_version == 1) { buf->bs_nlink = be16_to_cpu(dic->di_onlink); buf->bs_projid = 0; } else { diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 771144932ab..fcc2285d03e 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -274,9 +274,9 @@ xfs_bump_ino_vers2( xfs_mount_t *mp; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); + ASSERT(ip->i_d.di_version == 1); - ip->i_d.di_version = XFS_DINODE_VERSION_2; + ip->i_d.di_version = 2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); mp = tp->t_mountp; @@ -308,7 +308,7 @@ xfs_bumplink( ASSERT(ip->i_d.di_nlink > 0); ip->i_d.di_nlink++; inc_nlink(VFS_I(ip)); - if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && + if ((ip->i_d.di_version == 1) && (ip->i_d.di_nlink > XFS_MAXLINK_1)) { /* * The inode has increased its number of links beyond -- cgit v1.2.3 From 23fac50f959a87febf7ce4ae9d47525121f10c7a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:40 +1100 Subject: [XFS] split up xlog_recover_process_iunlinks Split out the body of the main loop into a separate helper to make the code readable. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_log_recover.c | 134 +++++++++++++++++++++++------------------------ 1 file changed, 66 insertions(+), 68 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4099618f5fa..841398d2421 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3147,6 +3147,70 @@ out_error: return; } +STATIC xfs_agino_t +xlog_recover_process_one_iunlink( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agino_t agino, + int bucket) +{ + struct xfs_buf *ibp; + struct xfs_dinode *dip; + struct xfs_inode *ip; + xfs_ino_t ino; + int error; + + ino = XFS_AGINO_TO_INO(mp, agno, agino); + error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); + if (error) + goto fail; + + /* + * Get the on disk inode to find the next inode in the bucket. + */ + ASSERT(ip != NULL); + error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + if (error) + goto fail; + + ASSERT(dip != NULL); + ASSERT(ip->i_d.di_nlink == 0); + + /* setup for the next pass */ + agino = be32_to_cpu(dip->di_next_unlinked); + xfs_buf_relse(ibp); + + /* + * Prevent any DMAPI event from being sent when the reference on + * the inode is dropped. + */ + ip->i_d.di_dmevmask = 0; + + /* + * If this is a new inode, handle it specially. Otherwise, just + * drop our reference to the inode. If there are no other + * references, this will send the inode to xfs_inactive() which + * will truncate the file and free the inode. + */ + if (ip->i_d.di_mode == 0) + xfs_iput_new(ip, 0); + else + IRELE(ip); + return agino; + + fail: + /* + * We can't read in the inode this bucket points to, or this inode + * is messed up. Just ditch this bucket of inodes. We will lose + * some inodes and space, but at least we won't hang. + * + * Call xlog_recover_clear_agi_bucket() to perform a transaction to + * clear the inode pointer in the bucket. + */ + xlog_recover_clear_agi_bucket(mp, agno, bucket); + return NULLAGINO; +} + /* * xlog_iunlink_recover * @@ -3167,11 +3231,7 @@ xlog_recover_process_iunlinks( xfs_agnumber_t agno; xfs_agi_t *agi; xfs_buf_t *agibp; - xfs_buf_t *ibp; - xfs_dinode_t *dip; - xfs_inode_t *ip; xfs_agino_t agino; - xfs_ino_t ino; int bucket; int error; uint mp_dmevmask; @@ -3201,10 +3261,8 @@ xlog_recover_process_iunlinks( agi = XFS_BUF_TO_AGI(agibp); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { - agino = be32_to_cpu(agi->agi_unlinked[bucket]); while (agino != NULLAGINO) { - /* * Release the agi buffer so that it can * be acquired in the normal course of the @@ -3212,68 +3270,8 @@ xlog_recover_process_iunlinks( */ xfs_buf_relse(agibp); - ino = XFS_AGINO_TO_INO(mp, agno, agino); - error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); - ASSERT(error || (ip != NULL)); - - if (!error) { - /* - * Get the on disk inode to find the - * next inode in the bucket. - */ - error = xfs_itobp(mp, NULL, ip, &dip, - &ibp, 0, 0, - XFS_BUF_LOCK); - ASSERT(error || (dip != NULL)); - } - - if (!error) { - ASSERT(ip->i_d.di_nlink == 0); - - /* setup for the next pass */ - agino = be32_to_cpu( - dip->di_next_unlinked); - xfs_buf_relse(ibp); - /* - * Prevent any DMAPI event from - * being sent when the - * reference on the inode is - * dropped. - */ - ip->i_d.di_dmevmask = 0; - - /* - * If this is a new inode, handle - * it specially. Otherwise, - * just drop our reference to the - * inode. If there are no - * other references, this will - * send the inode to - * xfs_inactive() which will - * truncate the file and free - * the inode. - */ - if (ip->i_d.di_mode == 0) - xfs_iput_new(ip, 0); - else - IRELE(ip); - } else { - /* - * We can't read in the inode - * this bucket points to, or - * this inode is messed up. Just - * ditch this bucket of inodes. We - * will lose some inodes and space, - * but at least we won't hang. Call - * xlog_recover_clear_agi_bucket() - * to perform a transaction to clear - * the inode pointer in the bucket. - */ - xlog_recover_clear_agi_bucket(mp, agno, - bucket); - - agino = NULLAGINO; - } + agino = xlog_recover_process_one_iunlink(mp, + agno, agino, bucket); /* * Reacquire the agibuffer and continue around -- cgit v1.2.3 From 76d8b277f7b715f78ee3cb09ee112563639693a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:40 +1100 Subject: [XFS] stop using xfs_itobp in xfs_iread The only caller of xfs_itobp that doesn't have i_blkno setup is now the initial inode read. It needs access to the whole xfs_imap so using xfs_inotobp is not an option. Instead opencode the buffer lookup in xfs_iread and kill all the functionality for the initial map from xfs_itobp. (First sent on October 21st) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_inode.c | 80 ++++++++++++++++++++---------------------------- fs/xfs/xfs_inode.h | 4 +-- fs/xfs/xfs_log_recover.c | 2 +- 3 files changed, 37 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 083395cd675..1d5c28b144a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -262,15 +262,11 @@ xfs_inotobp( * If a non-zero error is returned, then the contents of bpp and * dipp are undefined. * - * If the inode is new and has not yet been initialized, use xfs_imap() - * to determine the size and location of the buffer to read from disk. - * If the inode has already been mapped to its buffer and read in once, - * then use the mapping information stored in the inode rather than - * calling xfs_imap(). This allows us to avoid the overhead of looking - * at the inode btree for small block file systems (see xfs_dilocate()). - * We can tell whether the inode has been mapped in before by comparing - * its disk block address to 0. Only uninitialized inodes will have - * 0 for the disk block address. + * The inode is expected to already been mapped to its buffer and read + * in once, thus we can use the mapping information stored in the inode + * rather than calling xfs_imap(). This allows us to avoid the overhead + * of looking at the inode btree for small block file systems + * (see xfs_dilocate()). */ int xfs_itobp( @@ -279,40 +275,19 @@ xfs_itobp( xfs_inode_t *ip, xfs_dinode_t **dipp, xfs_buf_t **bpp, - xfs_daddr_t bno, - uint imap_flags, uint buf_flags) { xfs_imap_t imap; xfs_buf_t *bp; int error; - if (ip->i_blkno == (xfs_daddr_t)0) { - imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &imap, - XFS_IMAP_LOOKUP | imap_flags); - if (error) - return error; + ASSERT(ip->i_blkno != 0); - /* - * Fill in the fields in the inode that will be used to - * map the inode to its buffer from now on. - */ - ip->i_blkno = imap.im_blkno; - ip->i_len = imap.im_len; - ip->i_boffset = imap.im_boffset; - } else { - /* - * We've already mapped the inode once, so just use the - * mapping that we saved the first time. - */ - imap.im_blkno = ip->i_blkno; - imap.im_len = ip->i_len; - imap.im_boffset = ip->i_boffset; - } - ASSERT(bno == 0 || bno == imap.im_blkno); + imap.im_blkno = ip->i_blkno; + imap.im_len = ip->i_len; + imap.im_boffset = ip->i_boffset; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, 0); if (error) return error; @@ -882,6 +857,7 @@ xfs_iread( xfs_buf_t *bp; xfs_dinode_t *dip; xfs_inode_t *ip; + xfs_imap_t imap; int error; ip = xfs_inode_alloc(mp, ino); @@ -889,15 +865,27 @@ xfs_iread( return ENOMEM; /* - * Get pointer's to the on-disk inode and the buffer containing it. - * If the inode number refers to a block outside the file system - * then xfs_itobp() will return NULL. In this case we should - * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will - * know that this is a new incore inode. + * Get pointers to the on-disk inode and the buffer containing it. */ - error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); + imap.im_blkno = bno; + error = xfs_imap(mp, tp, ip->i_ino, &imap, + XFS_IMAP_LOOKUP | imap_flags); + if (error) + goto out_destroy_inode; + + /* + * Fill in the fields in the inode that will be used to + * map the inode to its buffer from now on. + */ + ip->i_blkno = imap.im_blkno; + ip->i_len = imap.im_len; + ip->i_boffset = imap.im_boffset; + ASSERT(bno == 0 || bno == imap.im_blkno); + + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); if (error) goto out_destroy_inode; + dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); /* * If we got something that isn't an inode it means someone @@ -1878,7 +1866,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) return error; @@ -1960,7 +1948,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2022,7 +2010,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2277,7 +2265,7 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) return error; @@ -3191,7 +3179,7 @@ xfs_iflush( /* * Get the buffer containing the on-disk inode. */ - error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, + error = xfs_itobp(mp, NULL, ip, &dip, &bp, noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); if (error || !bp) { xfs_ifunlock(ip); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 705083a8ffa..ec8b539439d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -157,7 +157,7 @@ typedef struct xfs_icdinode { #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ /* - * Flags for xfs_inotobp, xfs_itobp(), xfs_imap() and xfs_dilocate(). + * Flags for xfs_inotobp, xfs_imap() and xfs_dilocate(). */ #define XFS_IMAP_LOOKUP 0x1 #define XFS_IMAP_BULKSTAT 0x2 @@ -550,7 +550,7 @@ int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, struct xfs_buf **, int *, uint); int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, - struct xfs_buf **, xfs_daddr_t, uint, uint); + struct xfs_buf **, uint); void xfs_dinode_from_disk(struct xfs_icdinode *, struct xfs_dinode *); void xfs_dinode_to_disk(struct xfs_dinode *, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 841398d2421..48bdfa4dc29 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3169,7 +3169,7 @@ xlog_recover_process_one_iunlink( * Get the on disk inode to find the next inode in the bucket. */ ASSERT(ip != NULL); - error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) goto fail; -- cgit v1.2.3 From a1941895034cda2bffa23ba845607c82138ccf52 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:40 +1100 Subject: [XFS] remove dead code for old inode item recovery We have removed the support for old-style inode items a while ago and xlog_recover_do_inode_trans is now only called for XFS_LI_INODE items. That means we can remove the call to xfs_imap there and with it the XFS_IMAP_LOOKUP that is set by all other callers. We can also mark xfs_imap static now. (First sent on October 21st) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_ialloc.c | 3 +-- fs/xfs/xfs_inode.c | 5 ++--- fs/xfs/xfs_inode.h | 3 +-- fs/xfs/xfs_log_recover.c | 32 ++++++-------------------------- 4 files changed, 10 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 16eda31fe79..0ce56d6a492 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1262,8 +1262,7 @@ xfs_dilocate( #endif /* DEBUG */ return XFS_ERROR(EINVAL); } - if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) || - !(flags & XFS_IMAP_LOOKUP)) { + if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp))) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); *bno = XFS_AGB_TO_FSB(mp, agno, agbno); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1d5c28b144a..663ff9e517f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -237,7 +237,7 @@ xfs_inotobp( int error; imap.im_blkno = 0; - error = xfs_imap(mp, tp, ino, &imap, imap_flags | XFS_IMAP_LOOKUP); + error = xfs_imap(mp, tp, ino, &imap, imap_flags); if (error) return error; @@ -868,8 +868,7 @@ xfs_iread( * Get pointers to the on-disk inode and the buffer containing it. */ imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &imap, - XFS_IMAP_LOOKUP | imap_flags); + error = xfs_imap(mp, tp, ip->i_ino, &imap, imap_flags); if (error) goto out_destroy_inode; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ec8b539439d..db1f6884072 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -159,8 +159,7 @@ typedef struct xfs_icdinode { /* * Flags for xfs_inotobp, xfs_imap() and xfs_dilocate(). */ -#define XFS_IMAP_LOOKUP 0x1 -#define XFS_IMAP_BULKSTAT 0x2 +#define XFS_IMAP_BULKSTAT 0x1 /* * Fork handling. diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 48bdfa4dc29..bf8573b5a7d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2245,7 +2245,6 @@ xlog_recover_do_inode_trans( xfs_inode_log_format_t *in_f; xfs_mount_t *mp; xfs_buf_t *bp; - xfs_imap_t imap; xfs_dinode_t *dip; xfs_ino_t ino; int len; @@ -2273,48 +2272,29 @@ xlog_recover_do_inode_trans( } ino = in_f->ilf_ino; mp = log->l_mp; - if (ITEM_TYPE(item) == XFS_LI_INODE) { - imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno; - imap.im_len = in_f->ilf_len; - imap.im_boffset = in_f->ilf_boffset; - } else { - /* - * It's an old inode format record. We don't know where - * its cluster is located on disk, and we can't allow - * xfs_imap() to figure it out because the inode btrees - * are not ready to be used. Therefore do not pass the - * XFS_IMAP_LOOKUP flag to xfs_imap(). This will give - * us only the single block in which the inode lives - * rather than its cluster, so we must make sure to - * invalidate the buffer when we write it out below. - */ - imap.im_blkno = 0; - error = xfs_imap(log->l_mp, NULL, ino, &imap, 0); - if (error) - goto error; - } /* * Inode buffers can be freed, look out for it, * and do not replay the inode. */ - if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) { + if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, + in_f->ilf_len, 0)) { error = 0; goto error; } - bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, - XFS_BUF_LOCK); + bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, + in_f->ilf_len, XFS_BUF_LOCK); if (XFS_BUF_ISERROR(bp)) { xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, - bp, imap.im_blkno); + bp, in_f->ilf_blkno); error = XFS_BUF_GETERROR(bp); xfs_buf_relse(bp); goto error; } error = 0; ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); - dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); /* * Make sure the place we're flushing out to really looks -- cgit v1.2.3 From 94e1b69d1abd108d306e926c3012ec89e481c0da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:41 +1100 Subject: [XFS] merge xfs_imap into xfs_dilocate xfs_imap is the only caller of xfs_dilocate and doesn't add any significant value. Merge the two functions and document the various cases we have for inode cluster lookup in the new xfs_imap. Also remove the unused im_agblkno and im_ioffset fields from struct xfs_imap while we're at it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_ialloc.c | 129 +++++++++++++++++++++++++++++++++------------------- fs/xfs/xfs_ialloc.h | 10 ++-- fs/xfs/xfs_imap.h | 7 --- fs/xfs/xfs_inode.c | 60 +----------------------- fs/xfs/xfs_inode.h | 2 +- 5 files changed, 88 insertions(+), 120 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 0ce56d6a492..348ac30174c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -40,6 +40,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" +#include "xfs_imap.h" /* @@ -1196,36 +1197,28 @@ error0: } /* - * Return the location of the inode in bno/off, for mapping it into a buffer. + * Return the location of the inode in imap, for mapping it into a buffer. */ -/*ARGSUSED*/ int -xfs_dilocate( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ +xfs_imap( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t ino, /* inode to locate */ - xfs_fsblock_t *bno, /* output: block containing inode */ - int *len, /* output: num blocks in inode cluster */ - int *off, /* output: index in block of inode */ - uint flags) /* flags concerning inode lookup */ + struct xfs_imap *imap, /* location map structure */ + uint flags) /* flags for inode btree lookup */ { xfs_agblock_t agbno; /* block number of inode in the alloc group */ - xfs_buf_t *agbp; /* agi buffer */ xfs_agino_t agino; /* inode number within alloc group */ xfs_agnumber_t agno; /* allocation group number */ int blks_per_cluster; /* num blocks per inode cluster */ xfs_agblock_t chunk_agbno; /* first block in inode chunk */ - xfs_agino_t chunk_agino; /* first agino in inode chunk */ - __int32_t chunk_cnt; /* count of free inodes in chunk */ - xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ xfs_agblock_t cluster_agbno; /* first block in inode cluster */ - xfs_btree_cur_t *cur; /* inode btree cursor */ int error; /* error code */ - int i; /* temp state */ int offset; /* index of inode in its buffer */ int offset_agbno; /* blks from chunk start to inode */ ASSERT(ino != NULLFSINO); + /* * Split up the inode number into its parts. */ @@ -1240,20 +1233,20 @@ xfs_dilocate( return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_dilocate: agno (%d) >= " + "xfs_imap: agno (%d) >= " "mp->m_sb.sb_agcount (%d)", agno, mp->m_sb.sb_agcount); } if (agbno >= mp->m_sb.sb_agblocks) { xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_dilocate: agbno (0x%llx) >= " + "xfs_imap: agbno (0x%llx) >= " "mp->m_sb.sb_agblocks (0x%lx)", (unsigned long long) agbno, (unsigned long) mp->m_sb.sb_agblocks); } if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_dilocate: ino (0x%llx) != " + "xfs_imap: ino (0x%llx) != " "XFS_AGINO_TO_INO(mp, agno, agino) " "(0x%llx)", ino, XFS_AGINO_TO_INO(mp, agno, agino)); @@ -1262,63 +1255,89 @@ xfs_dilocate( #endif /* DEBUG */ return XFS_ERROR(EINVAL); } - if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp))) { + + /* + * If the inode cluster size is the same as the blocksize or + * smaller we get to the buffer by simple arithmetics. + */ + if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); - *bno = XFS_AGB_TO_FSB(mp, agno, agbno); - *off = offset; - *len = 1; + + imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); + imap->im_len = XFS_FSB_TO_BB(mp, 1); + imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); return 0; } + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; - if (*bno != NULLFSBLOCK) { + + /* + * If we get a block number passed from bulkstat we can use it to + * find the buffer easily. + */ + if (imap->im_blkno) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); - cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno); - *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + - offset; - *len = blks_per_cluster; + + cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno); + offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock; + + imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); + imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); return 0; } + + /* + * If the inode chunks are aligned then use simple maths to + * find the location. Otherwise we have to do a btree + * lookup to find the location. + */ if (mp->m_inoalign_mask) { offset_agbno = agbno & mp->m_inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { + xfs_btree_cur_t *cur; /* inode btree cursor */ + xfs_agino_t chunk_agino; /* first agino in inode chunk */ + __int32_t chunk_cnt; /* count of free inodes in chunk */ + xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ + xfs_buf_t *agbp; /* agi buffer */ + int i; /* temp state */ + down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); up_read(&mp->m_peraglock); if (error) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_ialloc_read_agi() returned " "error %d, agno %d", error, agno); -#endif /* DEBUG */ return error; } + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " + error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_inobt_lookup_le() failed"); -#endif /* DEBUG */ goto error0; } - if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, - &chunk_free, &i))) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " + + error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, + &chunk_free, &i); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_inobt_get_rec() failed"); -#endif /* DEBUG */ goto error0; } if (i == 0) { #ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_inobt_get_rec() failed"); #endif /* DEBUG */ error = XFS_ERROR(EINVAL); } + error0: xfs_trans_brelse(tp, agbp); xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); if (error) @@ -1326,19 +1345,35 @@ xfs_dilocate( chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); offset_agbno = agbno - chunk_agbno; } + ASSERT(agbno >= chunk_agbno); cluster_agbno = chunk_agbno + ((offset_agbno / blks_per_cluster) * blks_per_cluster); offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + XFS_INO_TO_OFFSET(mp, ino); - *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno); - *off = offset; - *len = blks_per_cluster; + + imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); + imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); + imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); + + /* + * If the inode number maps to a block outside the bounds + * of the file system then return NULL rather than calling + * read_buf and panicing when we get an error from the + * driver. + */ + if ((imap->im_blkno + imap->im_len) > + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " + "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " + " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", + (unsigned long long) imap->im_blkno, + (unsigned long long) imap->im_len, + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); + return XFS_ERROR(EINVAL); + } + return 0; -error0: - xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; } /* diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index ccf554a6e0a..50f558a4e0a 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -20,6 +20,7 @@ struct xfs_buf; struct xfs_dinode; +struct xfs_imap; struct xfs_mount; struct xfs_trans; @@ -104,17 +105,14 @@ xfs_difree( xfs_ino_t *first_ino); /* first inode in deleted cluster */ /* - * Return the location of the inode in bno/len/off, - * for mapping it into a buffer. + * Return the location of the inode in imap, for mapping it into a buffer. */ int -xfs_dilocate( +xfs_imap( struct xfs_mount *mp, /* file system mount structure */ struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t ino, /* inode to locate */ - xfs_fsblock_t *bno, /* output: block containing inode */ - int *len, /* output: num blocks in cluster*/ - int *off, /* output: index in block of inode */ + struct xfs_imap *imap, /* location map structure */ uint flags); /* flags for inode btree lookup */ /* diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h index f9ce62890ea..08690005739 100644 --- a/fs/xfs/xfs_imap.h +++ b/fs/xfs/xfs_imap.h @@ -25,14 +25,7 @@ typedef struct xfs_imap { xfs_daddr_t im_blkno; /* starting BB of inode chunk */ uint im_len; /* length in BBs of inode chunk */ - xfs_agblock_t im_agblkno; /* logical block of inode chunk in ag */ - ushort im_ioffset; /* inode offset in block in "inodes" */ ushort im_boffset; /* inode offset in block in bytes */ } xfs_imap_t; -struct xfs_mount; -struct xfs_trans; -int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - xfs_imap_t *, uint); - #endif /* __XFS_IMAP_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 663ff9e517f..bf528b725ae 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -266,7 +266,7 @@ xfs_inotobp( * in once, thus we can use the mapping information stored in the inode * rather than calling xfs_imap(). This allows us to avoid the overhead * of looking at the inode btree for small block file systems - * (see xfs_dilocate()). + * (see xfs_imap()). */ int xfs_itobp( @@ -2508,64 +2508,6 @@ xfs_idata_realloc( ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); } - - - -/* - * Map inode to disk block and offset. - * - * mp -- the mount point structure for the current file system - * tp -- the current transaction - * ino -- the inode number of the inode to be located - * imap -- this structure is filled in with the information necessary - * to retrieve the given inode from disk - * flags -- flags to pass to xfs_dilocate indicating whether or not - * lookups in the inode btree were OK or not - */ -int -xfs_imap( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - xfs_imap_t *imap, - uint flags) -{ - xfs_fsblock_t fsbno; - int len; - int off; - int error; - - fsbno = imap->im_blkno ? - XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; - error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); - if (error) - return error; - - imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); - imap->im_len = XFS_FSB_TO_BB(mp, len); - imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); - imap->im_ioffset = (ushort)off; - imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); - - /* - * If the inode number maps to a block outside the bounds - * of the file system then return NULL rather than calling - * read_buf and panicing when we get an error from the - * driver. - */ - if ((imap->im_blkno + imap->im_len) > - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " - " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", - (unsigned long long) imap->im_blkno, - (unsigned long long) imap->im_len, - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); - return EINVAL; - } - return 0; -} - void xfs_idestroy_fork( xfs_inode_t *ip, diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index db1f6884072..0a9ad1c56a8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -157,7 +157,7 @@ typedef struct xfs_icdinode { #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ /* - * Flags for xfs_inotobp, xfs_imap() and xfs_dilocate(). + * Flags for xfs_inotobp and xfs_imap(). */ #define XFS_IMAP_BULKSTAT 0x1 -- cgit v1.2.3 From 92bfc6e7c4eabbbd15e7d6d49123b296d05dcfd1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:41 +1100 Subject: [XFS] embededd struct xfs_imap into xfs_inode Most uses of struct xfs_imap are to map and inode to a buffer. To avoid copying around the inode location information we should just embedd a strcut xfs_imap into the xfs_inode. To make sure it doesn't bloat an inode the im_len is changed to a ushort, which is fine as that's what the users exepect anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_ialloc.c | 1 - fs/xfs/xfs_imap.h | 31 ------------------------------ fs/xfs/xfs_inode.c | 49 ++++++++++++++++++------------------------------ fs/xfs/xfs_inode.h | 14 +++++++++++--- fs/xfs/xfs_inode_item.c | 6 +++--- fs/xfs/xfs_itable.c | 2 +- fs/xfs/xfs_log_recover.c | 1 - 7 files changed, 33 insertions(+), 71 deletions(-) delete mode 100644 fs/xfs/xfs_imap.h (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 348ac30174c..4f4557262d9 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -40,7 +40,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_imap.h" /* diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h deleted file mode 100644 index 08690005739..00000000000 --- a/fs/xfs/xfs_imap.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_IMAP_H__ -#define __XFS_IMAP_H__ - -/* - * This is the structure passed to xfs_imap() to map - * an inode number to its on disk location. - */ -typedef struct xfs_imap { - xfs_daddr_t im_blkno; /* starting BB of inode chunk */ - uint im_len; /* length in BBs of inode chunk */ - ushort im_boffset; /* inode offset in block in bytes */ -} xfs_imap_t; - -#endif /* __XFS_IMAP_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bf528b725ae..72dc7a87a14 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -23,7 +23,6 @@ #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_imap.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_sb.h" @@ -134,7 +133,7 @@ STATIC int xfs_imap_to_bp( xfs_mount_t *mp, xfs_trans_t *tp, - xfs_imap_t *imap, + struct xfs_imap *imap, xfs_buf_t **bpp, uint buf_flags, uint imap_flags) @@ -232,7 +231,7 @@ xfs_inotobp( int *offset, uint imap_flags) { - xfs_imap_t imap; + struct xfs_imap imap; xfs_buf_t *bp; int error; @@ -277,17 +276,12 @@ xfs_itobp( xfs_buf_t **bpp, uint buf_flags) { - xfs_imap_t imap; xfs_buf_t *bp; int error; - ASSERT(ip->i_blkno != 0); + ASSERT(ip->i_imap.im_blkno != 0); - imap.im_blkno = ip->i_blkno; - imap.im_len = ip->i_len; - imap.im_boffset = ip->i_boffset; - - error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, 0); + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0); if (error) return error; @@ -298,7 +292,7 @@ xfs_itobp( return EAGAIN; } - *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); *bpp = bp; return 0; } @@ -799,9 +793,7 @@ xfs_inode_alloc( /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; - ip->i_blkno = 0; - ip->i_len = 0; - ip->i_boffset =0; + memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); ip->i_afp = NULL; memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; @@ -857,7 +849,6 @@ xfs_iread( xfs_buf_t *bp; xfs_dinode_t *dip; xfs_inode_t *ip; - xfs_imap_t imap; int error; ip = xfs_inode_alloc(mp, ino); @@ -865,26 +856,22 @@ xfs_iread( return ENOMEM; /* - * Get pointers to the on-disk inode and the buffer containing it. + * Fill in the location information in the in-core inode. */ - imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &imap, imap_flags); + ip->i_imap.im_blkno = bno; + error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, imap_flags); if (error) goto out_destroy_inode; + ASSERT(bno == 0 || bno == ip->i_imap.im_blkno); /* - * Fill in the fields in the inode that will be used to - * map the inode to its buffer from now on. + * Get pointers to the on-disk inode and the buffer containing it. */ - ip->i_blkno = imap.im_blkno; - ip->i_len = imap.im_len; - ip->i_boffset = imap.im_boffset; - ASSERT(bno == 0 || bno == imap.im_blkno); - - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, + XFS_BUF_LOCK, imap_flags); if (error) goto out_destroy_inode; - dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); /* * If we got something that isn't an inode it means someone @@ -1872,7 +1859,7 @@ xfs_iunlink( ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); /* both on-disk, don't endian flip twice */ dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; - offset = ip->i_boffset + + offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, @@ -1958,7 +1945,7 @@ xfs_iunlink_remove( ASSERT(next_agino != 0); if (next_agino != NULLAGINO) { dip->di_next_unlinked = cpu_to_be32(NULLAGINO); - offset = ip->i_boffset + + offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, @@ -2021,7 +2008,7 @@ xfs_iunlink_remove( ASSERT(next_agino != agino); if (next_agino != NULLAGINO) { dip->di_next_unlinked = cpu_to_be32(NULLAGINO); - offset = ip->i_boffset + + offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, @@ -3201,7 +3188,7 @@ xfs_iflush_int( } /* set *dip = inode's place in the buffer */ - dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); + dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); /* * Clear i_update_core before copying out the data. diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 0a9ad1c56a8..d5c3aa1b18e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -82,6 +82,16 @@ typedef struct xfs_ifork { } if_u2; } xfs_ifork_t; +/* + * Inode location information. Stored in the inode and passed to + * xfs_imap_to_bp() to get a buffer and dinode for a given inode. + */ +struct xfs_imap { + xfs_daddr_t im_blkno; /* starting BB of inode chunk */ + ushort im_len; /* length in BBs of inode chunk */ + ushort im_boffset; /* inode offset in block in bytes */ +}; + /* * This is the xfs in-core inode structure. * Most of the on-disk inode is embedded in the i_d field. @@ -238,9 +248,7 @@ typedef struct xfs_inode { /* Inode location stuff */ xfs_ino_t i_ino; /* inode number (agno/agino)*/ - xfs_daddr_t i_blkno; /* blkno of inode buffer */ - ushort i_len; /* len of inode buffer */ - ushort i_boffset; /* off of inode in buffer */ + struct xfs_imap i_imap; /* location for xfs_imap() */ /* Extent information. */ xfs_ifork_t *i_afp; /* attribute fork pointer */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c43118148e6..977c4aec587 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -942,9 +942,9 @@ xfs_inode_item_init( iip->ili_format.ilf_type = XFS_LI_INODE; iip->ili_format.ilf_ino = ip->i_ino; - iip->ili_format.ilf_blkno = ip->i_blkno; - iip->ili_format.ilf_len = ip->i_len; - iip->ili_format.ilf_boffset = ip->i_boffset; + iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; + iip->ili_format.ilf_len = ip->i_imap.im_len; + iip->ili_format.ilf_boffset = ip->i_imap.im_boffset; } /* diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2cf16f4695e..4315ce642b4 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -69,7 +69,7 @@ xfs_bulkstat_one_iget( } ASSERT(ip != NULL); - ASSERT(ip->i_blkno != (xfs_daddr_t)0); + ASSERT(ip->i_imap.im_blkno != 0); dic = &ip->i_d; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index bf8573b5a7d..ce6e907bec6 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -36,7 +36,6 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_imap.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_log_priv.h" -- cgit v1.2.3 From b48d8d64377f39913663a06f4757f3b8c6fc6d87 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:41 +1100 Subject: [XFS] kill the XFS_IMAP_BULKSTAT flag Just pass down the XFS_IGET_* flags all the way down to xfs_imap instead of translating them mid-way. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_ialloc.c | 2 +- fs/xfs/xfs_iget.c | 3 +-- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_inode.h | 5 ----- fs/xfs/xfs_itable.c | 2 +- 5 files changed, 5 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 4f4557262d9..e6ebbaeb4dc 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1228,7 +1228,7 @@ xfs_imap( ino != XFS_AGINO_TO_INO(mp, agno, agino)) { #ifdef DEBUG /* no diagnostics for bulkstat, ino comes from userspace */ - if (flags & XFS_IMAP_BULKSTAT) + if (flags & XFS_IGET_BULKSTAT) return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { xfs_fs_cmn_err(CE_ALERT, mp, diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index bf4dc5eb4cf..d60522a73a1 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -159,8 +159,7 @@ xfs_iget_cache_miss( * Read the disk inode attributes into a new inode structure and get * a new vnode for it. This should also initialize i_ino and i_mount. */ - error = xfs_iread(mp, tp, ino, &ip, bno, - (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); + error = xfs_iread(mp, tp, ino, &ip, bno, flags); if (error) return error; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 72dc7a87a14..4290760473f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -136,7 +136,7 @@ xfs_imap_to_bp( struct xfs_imap *imap, xfs_buf_t **bpp, uint buf_flags, - uint imap_flags) + uint iget_flags) { int error; int i; @@ -178,7 +178,7 @@ xfs_imap_to_bp( if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { - if (imap_flags & XFS_IMAP_BULKSTAT) { + if (iget_flags & XFS_IGET_BULKSTAT) { xfs_trans_brelse(tp, bp); return XFS_ERROR(EINVAL); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index d5c3aa1b18e..49e609c39d7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -166,11 +166,6 @@ typedef struct xfs_icdinode { #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ -/* - * Flags for xfs_inotobp and xfs_imap(). - */ -#define XFS_IMAP_BULKSTAT 0x1 - /* * Fork handling. */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 4315ce642b4..d4c0de86012 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -595,7 +595,7 @@ xfs_bulkstat( error = xfs_inotobp(mp, NULL, ino, &dip, &bp, &offset, - XFS_IMAP_BULKSTAT); + XFS_IGET_BULKSTAT); if (!error) clustidx = offset / mp->m_sb.sb_inodesize; -- cgit v1.2.3 From 24f211bad09a31f19dda0c3faffe0244f4f235f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:42 +1100 Subject: [XFS] move inode allocation out xfs_iread Allocate the inode in xfs_iget_cache_miss and pass it into xfs_iread. This simplifies the error handling and allows xfs_iread to be shared with userspace which already uses these semantics. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_iget.c | 88 +++++++++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_inode.c | 103 ++++------------------------------------------------- fs/xfs/xfs_inode.h | 4 +-- 3 files changed, 91 insertions(+), 104 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d60522a73a1..27ec2417b85 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -40,6 +40,82 @@ #include "xfs_utils.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_btree_trace.h" +#include "xfs_dir2_trace.h" + + +/* + * Allocate and initialise an xfs_inode. + */ +STATIC struct xfs_inode * +xfs_inode_alloc( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + struct xfs_inode *ip; + + /* + * if this didn't occur in transactions, we could use + * KM_MAYFAIL and return NULL here on ENOMEM. Set the + * code up to do this anyway. + */ + ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); + if (!ip) + return NULL; + + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(completion_done(&ip->i_flush)); + + /* + * initialise the VFS inode here to get failures + * out of the way early. + */ + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } + + /* initialise the xfs inode */ + ip->i_ino = ino; + ip->i_mount = mp; + memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); + ip->i_afp = NULL; + memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); + ip->i_flags = 0; + ip->i_update_core = 0; + ip->i_update_size = 0; + ip->i_delayed_blks = 0; + memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); + ip->i_size = 0; + ip->i_new_size = 0; + + /* + * Initialize inode's trace buffers. + */ +#ifdef XFS_INODE_TRACE + ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_BMAP_TRACE + ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_BTREE_TRACE + ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_RW_TRACE + ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_ILOCK_TRACE + ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_DIR2_TRACE + ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); +#endif + + return ip; +} /* * Check the validity of the inode we just found it the cache @@ -155,13 +231,13 @@ xfs_iget_cache_miss( unsigned long first_index, mask; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); - /* - * Read the disk inode attributes into a new inode structure and get - * a new vnode for it. This should also initialize i_ino and i_mount. - */ - error = xfs_iread(mp, tp, ino, &ip, bno, flags); + ip = xfs_inode_alloc(mp, ino); + if (!ip) + return ENOMEM; + + error = xfs_iread(mp, tp, ip, bno, flags); if (error) - return error; + goto out_destroy; xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4290760473f..872191b4678 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -758,119 +758,36 @@ xfs_dic2xflags( } /* - * Allocate and initialise an xfs_inode. - */ -STATIC struct xfs_inode * -xfs_inode_alloc( - struct xfs_mount *mp, - xfs_ino_t ino) -{ - struct xfs_inode *ip; - - /* - * if this didn't occur in transactions, we could use - * KM_MAYFAIL and return NULL here on ENOMEM. Set the - * code up to do this anyway. - */ - ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); - if (!ip) - return NULL; - - ASSERT(atomic_read(&ip->i_iocount) == 0); - ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(completion_done(&ip->i_flush)); - - /* - * initialise the VFS inode here to get failures - * out of the way early. - */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) { - kmem_zone_free(xfs_inode_zone, ip); - return NULL; - } - - /* initialise the xfs inode */ - ip->i_ino = ino; - ip->i_mount = mp; - memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); - ip->i_afp = NULL; - memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); - ip->i_flags = 0; - ip->i_update_core = 0; - ip->i_update_size = 0; - ip->i_delayed_blks = 0; - memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); - ip->i_size = 0; - ip->i_new_size = 0; - - /* - * Initialize inode's trace buffers. - */ -#ifdef XFS_INODE_TRACE - ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BMAP_TRACE - ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BTREE_TRACE - ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_RW_TRACE - ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_ILOCK_TRACE - ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_DIR2_TRACE - ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); -#endif - - return ip; -} - -/* - * Given a mount structure and an inode number, return a pointer - * to a newly allocated in-core inode corresponding to the given - * inode number. - * - * Initialize the inode's attributes and extent pointers if it - * already has them (it will not if the inode has no links). + * Read the disk inode attributes into the in-core inode structure. */ int xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, - xfs_ino_t ino, - xfs_inode_t **ipp, + xfs_inode_t *ip, xfs_daddr_t bno, - uint imap_flags) + uint iget_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; - xfs_inode_t *ip; int error; - ip = xfs_inode_alloc(mp, ino); - if (!ip) - return ENOMEM; - /* * Fill in the location information in the in-core inode. */ ip->i_imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, imap_flags); + error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) - goto out_destroy_inode; + return error; ASSERT(bno == 0 || bno == ip->i_imap.im_blkno); /* * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, - XFS_BUF_LOCK, imap_flags); + XFS_BUF_LOCK, iget_flags); if (error) - goto out_destroy_inode; + return error; dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); /* @@ -968,14 +885,8 @@ xfs_iread( * to worry about the inode being changed just because we released * the buffer. */ - xfs_trans_brelse(tp, bp); - *ipp = ip; - return 0; - out_brelse: xfs_trans_brelse(tp, bp); - out_destroy_inode: - xfs_destroy_inode(ip); return error; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 49e609c39d7..ae5800e7d39 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -516,8 +516,8 @@ void xfs_ireclaim(xfs_inode_t *); /* * xfs_inode.c prototypes. */ -int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - xfs_inode_t **, xfs_daddr_t, uint); +int xfs_iread(struct xfs_mount *, struct xfs_trans *, + struct xfs_inode *, xfs_daddr_t, uint); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); -- cgit v1.2.3 From 0e446673a15a4e9c336b67c1a638eb12c21d0993 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:42 +1100 Subject: [XFS] fix error handling in xlog_recover_process_one_iunlink If we fail after xfs_iget we have to drop the reference count, spotted by Dave Chinner. Also remove some useless asserts and stop trying to deal with di_mode == 0 inodes because never gets those without passing the IGET_CREATE flag to xfs_iget. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Niv Sardi --- fs/xfs/xfs_log_recover.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index ce6e907bec6..51412cced01 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3147,13 +3147,12 @@ xlog_recover_process_one_iunlink( /* * Get the on disk inode to find the next inode in the bucket. */ - ASSERT(ip != NULL); error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) - goto fail; + goto fail_iput; - ASSERT(dip != NULL); ASSERT(ip->i_d.di_nlink == 0); + ASSERT(ip->i_d.di_mode != 0); /* setup for the next pass */ agino = be32_to_cpu(dip->di_next_unlinked); @@ -3165,18 +3164,11 @@ xlog_recover_process_one_iunlink( */ ip->i_d.di_dmevmask = 0; - /* - * If this is a new inode, handle it specially. Otherwise, just - * drop our reference to the inode. If there are no other - * references, this will send the inode to xfs_inactive() which - * will truncate the file and free the inode. - */ - if (ip->i_d.di_mode == 0) - xfs_iput_new(ip, 0); - else - IRELE(ip); + IRELE(ip); return agino; + fail_iput: + IRELE(ip); fail: /* * We can't read in the inode this bucket points to, or this inode -- cgit v1.2.3 From 743bb4650da9e2595d6cedd01c680b5b9398c74a Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:06 -0600 Subject: [XFS] Move copy_from_user calls out of ioctl helpers into ioctl switch. Moving the copy_from_user out of some of the ioctl helpers will make it easier for the compat ioctl switch to copy in the right struct, then just pass to the underlying helper. Also, move common access checks into the helpers themselves, and out of the native ioctl switch code, to reduce code duplication between native & compat ioctl callers. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl.c | 109 +++++++++++++++++++++---------------------- fs/xfs/xfs_dfrag.c | 8 +--- fs/xfs/xfs_dfrag.h | 2 +- fs/xfs/xfs_fsops.c | 6 +++ fs/xfs/xfs_rtalloc.c | 2 + 5 files changed, 63 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 534b175f3a4..03c55b8fa37 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -71,23 +71,19 @@ STATIC int xfs_find_handle( unsigned int cmd, - void __user *arg) + xfs_fsop_handlereq_t *hreq) { int hsize; xfs_handle_t handle; - xfs_fsop_handlereq_t hreq; struct inode *inode; - if (copy_from_user(&hreq, arg, sizeof(hreq))) - return -XFS_ERROR(EFAULT); - memset((char *)&handle, 0, sizeof(handle)); switch (cmd) { case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_PATH_TO_HANDLE: { struct path path; - int error = user_lpath((const char __user *)hreq.path, &path); + int error = user_lpath((const char __user *)hreq->path, &path); if (error) return error; @@ -101,7 +97,7 @@ xfs_find_handle( case XFS_IOC_FD_TO_HANDLE: { struct file *file; - file = fget(hreq.fd); + file = fget(hreq->fd); if (!file) return -EBADF; @@ -158,8 +154,8 @@ xfs_find_handle( } /* now copy our handle into the user buffer & write out the size */ - if (copy_to_user(hreq.ohandle, &handle, hsize) || - copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { + if (copy_to_user(hreq->ohandle, &handle, hsize) || + copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) { iput(inode); return -XFS_ERROR(EFAULT); } @@ -252,7 +248,7 @@ xfs_vget_fsop_handlereq( STATIC int xfs_open_by_handle( xfs_mount_t *mp, - void __user *arg, + xfs_fsop_handlereq_t *hreq, struct file *parfilp, struct inode *parinode) { @@ -262,14 +258,11 @@ xfs_open_by_handle( struct file *filp; struct inode *inode; struct dentry *dentry; - xfs_fsop_handlereq_t hreq; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode); if (error) return -error; @@ -280,10 +273,10 @@ xfs_open_by_handle( } #if BITS_PER_LONG != 32 - hreq.oflags |= O_LARGEFILE; + hreq->oflags |= O_LARGEFILE; #endif /* Put open permission in namei format. */ - permflag = hreq.oflags; + permflag = hreq->oflags; if ((permflag+1) & O_ACCMODE) permflag++; if (permflag & O_TRUNC) @@ -321,7 +314,7 @@ xfs_open_by_handle( mntget(parfilp->f_path.mnt); /* Create file pointer. */ - filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags); + filp = dentry_open(dentry, parfilp->f_path.mnt, hreq->oflags); if (IS_ERR(filp)) { put_unused_fd(new_fd); return -XFS_ERROR(-PTR_ERR(filp)); @@ -365,21 +358,18 @@ do_readlink( STATIC int xfs_readlink_by_handle( xfs_mount_t *mp, - void __user *arg, + xfs_fsop_handlereq_t *hreq, struct inode *parinode) { struct inode *inode; - xfs_fsop_handlereq_t hreq; __u32 olen; void *link; int error; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode); if (error) return -error; @@ -389,7 +379,7 @@ xfs_readlink_by_handle( goto out_iput; } - if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) { + if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { error = -XFS_ERROR(EFAULT); goto out_iput; } @@ -401,7 +391,7 @@ xfs_readlink_by_handle( error = -xfs_readlink(XFS_I(inode), link); if (error) goto out_kfree; - error = do_readlink(hreq.ohandle, olen, link); + error = do_readlink(hreq->ohandle, olen, link); if (error) goto out_kfree; @@ -668,12 +658,19 @@ xfs_ioc_space( struct file *filp, int ioflags, unsigned int cmd, - void __user *arg) + xfs_flock64_t *bf) { - xfs_flock64_t bf; int attr_flags = 0; int error; + /* + * Only allow the sys admin to reserve space unless + * unwritten extents are enabled. + */ + if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && + !capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); @@ -683,15 +680,12 @@ xfs_ioc_space( if (!S_ISREG(inode->i_mode)) return -XFS_ERROR(EINVAL); - if (copy_from_user(&bf, arg, sizeof(bf))) - return -XFS_ERROR(EFAULT); - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= XFS_ATTR_NONBLOCK; if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; - error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, attr_flags); + error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); return -error; } @@ -1356,17 +1350,13 @@ xfs_ioctl( case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - /* - * Only allow the sys admin to reserve space unless - * unwritten extents are enabled. - */ - if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); + case XFS_IOC_UNRESVSP64: { + xfs_flock64_t bf; + if (copy_from_user(&bf, arg, sizeof(bf))) + return -XFS_ERROR(EFAULT); + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } case XFS_IOC_DIOINFO: { struct dioattr da; xfs_buftarg_t *target = @@ -1426,18 +1416,30 @@ xfs_ioctl( case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_FSHANDLE: - return xfs_find_handle(cmd, arg); + case XFS_IOC_PATH_TO_FSHANDLE: { + xfs_fsop_handlereq_t hreq; - case XFS_IOC_OPEN_BY_HANDLE: - return xfs_open_by_handle(mp, arg, filp, inode); + if (copy_from_user(&hreq, arg, sizeof(hreq))) + return -XFS_ERROR(EFAULT); + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(mp, &hreq, filp, inode); + } case XFS_IOC_FSSETDM_BY_HANDLE: return xfs_fssetdm_by_handle(mp, arg, inode); - case XFS_IOC_READLINK_BY_HANDLE: - return xfs_readlink_by_handle(mp, arg, inode); + case XFS_IOC_READLINK_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(mp, &hreq, inode); + } case XFS_IOC_ATTRLIST_BY_HANDLE: return xfs_attrlist_by_handle(mp, arg, inode); @@ -1445,7 +1447,11 @@ xfs_ioctl( return xfs_attrmulti_by_handle(mp, arg, filp, inode); case XFS_IOC_SWAPEXT: { - error = xfs_swapext((struct xfs_swapext __user *)arg); + struct xfs_swapext sxp; + + if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); return -error; } @@ -1501,9 +1507,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSDATA: { xfs_growfs_data_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); @@ -1514,9 +1517,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); @@ -1527,9 +1527,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 75b0cd4da0e..b4c1ee71349 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -49,9 +49,8 @@ */ int xfs_swapext( - xfs_swapext_t __user *sxu) + xfs_swapext_t *sxp) { - xfs_swapext_t *sxp; xfs_inode_t *ip, *tip; struct file *file, *target_file; int error = 0; @@ -62,11 +61,6 @@ xfs_swapext( goto out; } - if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) { - error = XFS_ERROR(EFAULT); - goto out_free_sxp; - } - /* Pull information for the target fd */ file = fget((int)sxp->sx_fdtarget); if (!file) { diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h index da178205be6..4f55a630655 100644 --- a/fs/xfs/xfs_dfrag.h +++ b/fs/xfs/xfs_dfrag.h @@ -46,7 +46,7 @@ typedef struct xfs_swapext /* * Syscall interface for xfs_swapext */ -int xfs_swapext(struct xfs_swapext __user *sx); +int xfs_swapext(struct xfs_swapext *sx); int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, struct xfs_swapext *sxp); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index f1d0585041b..852b6d32e8d 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -435,6 +435,9 @@ xfs_growfs_data( xfs_growfs_data_t *in) { int error; + + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); if (!mutex_trylock(&mp->m_growlock)) return XFS_ERROR(EWOULDBLOCK); error = xfs_growfs_data_private(mp, in); @@ -448,6 +451,9 @@ xfs_growfs_log( xfs_growfs_log_t *in) { int error; + + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); if (!mutex_trylock(&mp->m_growlock)) return XFS_ERROR(EWOULDBLOCK); error = xfs_growfs_log_private(mp, in); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index f18b9b28179..edf12c7b834 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1876,6 +1876,8 @@ xfs_growfs_rt( /* * Initial error checking. */ + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || (nrblocks = in->newblocks) <= sbp->sb_rblocks || (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) -- cgit v1.2.3 From ffae263a640b736a7206a0d7bd14ab44eb58cd28 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:07 -0600 Subject: [XFS] Move compat ioctl structs & numbers into xfs_ioctl32.h This makes the c file less cluttered and a bit more readable. Consistently name the ioctl number macros with "_32" and the compatibility stuctures with "_compat." Rename the helpers which simply copy in the arg with "_copyin" for easy identification. Finally, for a few of the existing helpers, modify them so that they directly call the native ioctl helper after userspace argument fixup. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 140 ++------------------------------------- fs/xfs/linux-2.6/xfs_ioctl32.h | 145 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_fs.h | 4 -- 3 files changed, 150 insertions(+), 139 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 6ce9c9c7cdd..620bb2d6d26 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -48,36 +48,12 @@ #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) -#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) -#define BROKEN_X86_ALIGNMENT -#define _PACKED __attribute__((packed)) -/* on ia32 l_start is on a 32-bit boundary */ -typedef struct xfs_flock64_32 { - __s16 l_type; - __s16 l_whence; - __s64 l_start __attribute__((packed)); - /* len == 0 means until end of file */ - __s64 l_len __attribute__((packed)); - __s32 l_sysid; - __u32 l_pid; - __s32 l_pad[4]; /* reserve area */ -} xfs_flock64_32_t; - -#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32) -#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32) -#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32) -#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32) -#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32) -#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32) -#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32) -#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32) - -/* just account for different alignment */ +#ifdef BROKEN_X86_ALIGNMENT STATIC unsigned long xfs_ioctl32_flock( unsigned long arg) { - xfs_flock64_32_t __user *p32 = (void __user *)arg; + compat_xfs_flock64_t __user *p32 = (void __user *)arg; xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || @@ -92,32 +68,6 @@ xfs_ioctl32_flock( return (unsigned long)p; } -typedef struct compat_xfs_fsop_geom_v1 { - __u32 blocksize; /* filesystem (data) block size */ - __u32 rtextsize; /* realtime extent size */ - __u32 agblocks; /* fsblocks in an AG */ - __u32 agcount; /* number of allocation groups */ - __u32 logblocks; /* fsblocks in the log */ - __u32 sectsize; /* (data) sector size, bytes */ - __u32 inodesize; /* inode size in bytes */ - __u32 imaxpct; /* max allowed inode space(%) */ - __u64 datablocks; /* fsblocks in data subvolume */ - __u64 rtblocks; /* fsblocks in realtime subvol */ - __u64 rtextents; /* rt extents in realtime subvol*/ - __u64 logstart; /* starting fsblock of the log */ - unsigned char uuid[16]; /* unique id of the filesystem */ - __u32 sunit; /* stripe unit, fsblocks */ - __u32 swidth; /* stripe width, fsblocks */ - __s32 version; /* structure version */ - __u32 flags; /* superblock version flags */ - __u32 logsectsize; /* log sector size, bytes */ - __u32 rtsectsize; /* realtime sector size, bytes */ - __u32 dirblocksize; /* directory block size, bytes */ -} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; - -#define XFS_IOC_FSGEOMETRY_V1_32 \ - _IOR ('X', 100, struct compat_xfs_fsop_geom_v1) - STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) { compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; @@ -128,12 +78,6 @@ STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) return (unsigned long)p; } -typedef struct compat_xfs_inogrp { - __u64 xi_startino; /* starting inode number */ - __s32 xi_alloccount; /* # bits set in allocmask */ - __u64 xi_allocmask; /* mask of allocated inodes */ -} __attribute__((packed)) compat_xfs_inogrp_t; - STATIC int xfs_inumbers_fmt_compat( void __user *ubuffer, const xfs_inogrp_t *buffer, @@ -154,19 +98,11 @@ STATIC int xfs_inumbers_fmt_compat( } #else - #define xfs_inumbers_fmt_compat xfs_inumbers_fmt -#define _PACKED - #endif /* XFS_IOC_FSBULKSTAT and friends */ -typedef struct compat_xfs_bstime { - __s32 tv_sec; /* seconds */ - __s32 tv_nsec; /* and nanoseconds */ -} compat_xfs_bstime_t; - STATIC int xfs_bstime_store_compat( compat_xfs_bstime_t __user *p32, const xfs_bstime_t *p) @@ -180,30 +116,6 @@ STATIC int xfs_bstime_store_compat( return 0; } -typedef struct compat_xfs_bstat { - __u64 bs_ino; /* inode number */ - __u16 bs_mode; /* type and mode */ - __u16 bs_nlink; /* number of links */ - __u32 bs_uid; /* user id */ - __u32 bs_gid; /* group id */ - __u32 bs_rdev; /* device value */ - __s32 bs_blksize; /* block size */ - __s64 bs_size; /* file size */ - compat_xfs_bstime_t bs_atime; /* access time */ - compat_xfs_bstime_t bs_mtime; /* modify time */ - compat_xfs_bstime_t bs_ctime; /* inode change time */ - int64_t bs_blocks; /* number of blocks */ - __u32 bs_xflags; /* extended flags */ - __s32 bs_extsize; /* extent size */ - __s32 bs_extents; /* number of extents */ - __u32 bs_gen; /* generation count */ - __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ - __u32 bs_dmevmask; /* DMIG event mask */ - __u16 bs_dmstate; /* DMIG state info */ - __u16 bs_aextents; /* attribute number of extents */ -} _PACKED compat_xfs_bstat_t; - STATIC int xfs_bulkstat_one_fmt_compat( void __user *ubuffer, const xfs_bstat_t *buffer) @@ -234,22 +146,6 @@ STATIC int xfs_bulkstat_one_fmt_compat( return sizeof(*p32); } - - -typedef struct compat_xfs_fsop_bulkreq { - compat_uptr_t lastip; /* last inode # pointer */ - __s32 icount; /* count of entries in buffer */ - compat_uptr_t ubuffer; /* user buffer for inode desc. */ - compat_uptr_t ocount; /* output count pointer */ -} compat_xfs_fsop_bulkreq_t; - -#define XFS_IOC_FSBULKSTAT_32 \ - _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ - _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSINUMBERS_32 \ - _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) - /* copied from xfs_ioctl.c */ STATIC int xfs_ioc_bulkstat_compat( @@ -320,29 +216,6 @@ xfs_ioc_bulkstat_compat( return 0; } - - -typedef struct compat_xfs_fsop_handlereq { - __u32 fd; /* fd for FD_TO_HANDLE */ - compat_uptr_t path; /* user pathname */ - __u32 oflags; /* open flags */ - compat_uptr_t ihandle; /* user supplied handle */ - __u32 ihandlen; /* user supplied length */ - compat_uptr_t ohandle; /* user buffer for handle */ - compat_uptr_t ohandlen; /* user buffer length */ -} compat_xfs_fsop_handlereq_t; - -#define XFS_IOC_PATH_TO_FSHANDLE_32 \ - _IOWR('X', 104, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_PATH_TO_HANDLE_32 \ - _IOWR('X', 105, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_FD_TO_HANDLE_32 \ - _IOWR('X', 106, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_OPEN_BY_HANDLE_32 \ - _IOWR('X', 107, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_READLINK_BY_HANDLE_32 \ - _IOWR('X', 108, struct compat_xfs_fsop_handlereq) - STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg) { compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg; @@ -365,7 +238,6 @@ STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg) return (unsigned long)p; } - STATIC long xfs_compat_ioctl( int mode, @@ -404,9 +276,9 @@ xfs_compat_ioctl( case XFS_IOC_ERROR_CLEARALL: break; - case XFS_IOC32_GETXFLAGS: - case XFS_IOC32_SETXFLAGS: - case XFS_IOC32_GETVERSION: + case XFS_IOC_GETXFLAGS_32: + case XFS_IOC_SETXFLAGS_32: + case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); break; #ifdef BROKEN_X86_ALIGNMENT @@ -426,7 +298,6 @@ xfs_compat_ioctl( arg = xfs_ioctl32_geom_v1(arg); cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1); break; - #else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: @@ -464,7 +335,6 @@ xfs_compat_ioctl( error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg); xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); - return error; } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 02de6e62ee3..003a10e695b 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -18,7 +18,152 @@ #ifndef __XFS_IOCTL32_H__ #define __XFS_IOCTL32_H__ +#include + extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long); extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long); +/* + * on 32-bit arches, ioctl argument structures may have different sizes + * and/or alignment. We define compat structures which match the + * 32-bit sizes/alignments here, and their associated ioctl numbers. + * + * xfs_ioctl32.c contains routines to copy these structures in and out. + */ + +/* stock kernel-level ioctls we support */ +#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS +#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS +#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION + +/* + * On intel, even if sizes match, alignment and/or padding may differ. + */ +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +#define BROKEN_X86_ALIGNMENT +#define __compat_packed __attribute__((packed)) +#else +#define __compat_packed +#endif + +typedef struct compat_xfs_bstime { + compat_time_t tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} compat_xfs_bstime_t; + +typedef struct compat_xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + compat_xfs_bstime_t bs_atime; /* access time */ + compat_xfs_bstime_t bs_mtime; /* modify time */ + compat_xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid; /* project id */ + unsigned char bs_pad[14]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} __compat_packed compat_xfs_bstat_t; + +typedef struct compat_xfs_fsop_bulkreq { + compat_uptr_t lastip; /* last inode # pointer */ + __s32 icount; /* count of entries in buffer */ + compat_uptr_t ubuffer; /* user buffer for inode desc. */ + compat_uptr_t ocount; /* output count pointer */ +} compat_xfs_fsop_bulkreq_t; + +#define XFS_IOC_FSBULKSTAT_32 \ + _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ + _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS_32 \ + _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) + +typedef struct compat_xfs_fsop_handlereq { + __u32 fd; /* fd for FD_TO_HANDLE */ + compat_uptr_t path; /* user pathname */ + __u32 oflags; /* open flags */ + compat_uptr_t ihandle; /* user supplied handle */ + __u32 ihandlen; /* user supplied length */ + compat_uptr_t ohandle; /* user buffer for handle */ + compat_uptr_t ohandlen; /* user buffer length */ +} compat_xfs_fsop_handlereq_t; + +#define XFS_IOC_PATH_TO_FSHANDLE_32 \ + _IOWR('X', 104, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_PATH_TO_HANDLE_32 \ + _IOWR('X', 105, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_FD_TO_HANDLE_32 \ + _IOWR('X', 106, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_OPEN_BY_HANDLE_32 \ + _IOWR('X', 107, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_READLINK_BY_HANDLE_32 \ + _IOWR('X', 108, struct compat_xfs_fsop_handlereq) + +#ifdef BROKEN_X86_ALIGNMENT +/* on ia32 l_start is on a 32-bit boundary */ +typedef struct compat_xfs_flock64 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} compat_xfs_flock64_t; + +#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64) +#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64) +#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64) +#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) + +typedef struct compat_xfs_fsop_geom_v1 { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; + +#define XFS_IOC_FSGEOMETRY_V1_32 \ + _IOR('X', 100, struct compat_xfs_fsop_geom_v1) + +typedef struct compat_xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} __attribute__((packed)) compat_xfs_inogrp_t; + +#endif /* BROKEN_X86_ALIGNMENT */ + #endif /* __XFS_IOCTL32_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 4ae03bae992..589c41c3844 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -418,10 +418,6 @@ typedef struct xfs_handle { #define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS #define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS #define XFS_IOC_GETVERSION FS_IOC_GETVERSION -/* 32-bit compat counterparts */ -#define XFS_IOC32_GETXFLAGS FS_IOC32_GETFLAGS -#define XFS_IOC32_SETXFLAGS FS_IOC32_SETFLAGS -#define XFS_IOC32_GETVERSION FS_IOC32_GETVERSION /* * ioctl commands that replace IRIX fcntl()'s -- cgit v1.2.3 From d5547f9feea459dfc9e7313bd1d561394e2c129f Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:08 -0600 Subject: [XFS] Clean up some existing compat ioctl calls Create a new xfs_ioctl.h file which has prototypes for ioctl helpers that may be called in compat mode. Change several compat ioctl cases which are IOW to simply copy in the userspace argument, then call the common ioctl helper. This also fixes xfs_compat_ioc_fsgeometry_v1(), which had it backwards before; it copied in an (empty) arg, then copied out the native result, which probably corrupted userspace. It should be translating on the copyout. Also, a bit of formatting cleanup for consistency, and conversion of all error returns to use XFS_ERROR(). Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl.c | 8 +- fs/xfs/linux-2.6/xfs_ioctl.h | 47 ++++++++++ fs/xfs/linux-2.6/xfs_ioctl32.c | 207 ++++++++++++++++++++++++----------------- 3 files changed, 171 insertions(+), 91 deletions(-) create mode 100644 fs/xfs/linux-2.6/xfs_ioctl.h (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 03c55b8fa37..498d3e15843 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -68,7 +68,7 @@ * XFS_IOC_PATH_TO_HANDLE * returns full handle for a path */ -STATIC int +int xfs_find_handle( unsigned int cmd, xfs_fsop_handlereq_t *hreq) @@ -245,7 +245,7 @@ xfs_vget_fsop_handlereq( return 0; } -STATIC int +int xfs_open_by_handle( xfs_mount_t *mp, xfs_fsop_handlereq_t *hreq, @@ -355,7 +355,7 @@ do_readlink( } -STATIC int +int xfs_readlink_by_handle( xfs_mount_t *mp, xfs_fsop_handlereq_t *hreq, @@ -651,7 +651,7 @@ xfs_attrmulti_by_handle( return -error; } -STATIC int +int xfs_ioc_space( struct xfs_inode *ip, struct inode *inode, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h new file mode 100644 index 00000000000..2df849f49a5 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOCTL_H__ +#define __XFS_IOCTL_H__ + +extern int +xfs_ioc_space( + struct xfs_inode *ip, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + xfs_flock64_t *bf); + +extern int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_open_by_handle( + xfs_mount_t *mp, + xfs_fsop_handlereq_t *hreq, + struct file *parfilp, + struct inode *parinode); + +extern int +xfs_readlink_by_handle( + xfs_mount_t *mp, + xfs_fsop_handlereq_t *hreq, + struct inode *parinode); +#endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 620bb2d6d26..35edc054fb9 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -43,55 +43,62 @@ #include "xfs_error.h" #include "xfs_dfrag.h" #include "xfs_vnodeops.h" +#include "xfs_fsops.h" +#include "xfs_ioctl.h" #include "xfs_ioctl32.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) #ifdef BROKEN_X86_ALIGNMENT -STATIC unsigned long -xfs_ioctl32_flock( - unsigned long arg) +STATIC int +xfs_compat_flock64_copyin( + xfs_flock64_t *bf, + compat_xfs_flock64_t __user *arg32) { - compat_xfs_flock64_t __user *p32 = (void __user *)arg; - xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); - - if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || - copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || - copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || - copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || - copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || - copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || - copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) - return -EFAULT; - - return (unsigned long)p; + if (get_user(bf->l_type, &arg32->l_type) || + get_user(bf->l_whence, &arg32->l_whence) || + get_user(bf->l_start, &arg32->l_start) || + get_user(bf->l_len, &arg32->l_len) || + get_user(bf->l_sysid, &arg32->l_sysid) || + get_user(bf->l_pid, &arg32->l_pid) || + copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) + return -XFS_ERROR(EFAULT); + return 0; } -STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) +STATIC int +xfs_compat_ioc_fsgeometry_v1( + struct xfs_mount *mp, + compat_xfs_fsop_geom_v1_t __user *arg32) { - compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; - xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p)); + xfs_fsop_geom_t fsgeo; + int error; - if (copy_in_user(p, p32, sizeof(*p32))) - return -EFAULT; - return (unsigned long)p; + error = xfs_fs_geometry(mp, &fsgeo, 3); + if (error) + return -error; + /* The 32-bit variant simply has some padding at the end */ + if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) + return -XFS_ERROR(EFAULT); + return 0; } -STATIC int xfs_inumbers_fmt_compat( - void __user *ubuffer, - const xfs_inogrp_t *buffer, - long count, - long *written) +STATIC int +xfs_inumbers_fmt_compat( + void __user *ubuffer, + const xfs_inogrp_t *buffer, + long count, + long *written) { - compat_xfs_inogrp_t __user *p32 = ubuffer; - long i; + compat_xfs_inogrp_t __user *p32 = ubuffer; + long i; for (i = 0; i < count; i++) { if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -EFAULT; + return -XFS_ERROR(EFAULT); } *written = count * sizeof(*p32); return 0; @@ -103,24 +110,26 @@ STATIC int xfs_inumbers_fmt_compat( /* XFS_IOC_FSBULKSTAT and friends */ -STATIC int xfs_bstime_store_compat( - compat_xfs_bstime_t __user *p32, - const xfs_bstime_t *p) +STATIC int +xfs_bstime_store_compat( + compat_xfs_bstime_t __user *p32, + const xfs_bstime_t *p) { - __s32 sec32; + __s32 sec32; sec32 = p->tv_sec; if (put_user(sec32, &p32->tv_sec) || put_user(p->tv_nsec, &p32->tv_nsec)) - return -EFAULT; + return -XFS_ERROR(EFAULT); return 0; } -STATIC int xfs_bulkstat_one_fmt_compat( +STATIC int +xfs_bulkstat_one_fmt_compat( void __user *ubuffer, const xfs_bstat_t *buffer) { - compat_xfs_bstat_t __user *p32 = ubuffer; + compat_xfs_bstat_t __user *p32 = ubuffer; if (put_user(buffer->bs_ino, &p32->bs_ino) || put_user(buffer->bs_mode, &p32->bs_mode) || @@ -142,7 +151,7 @@ STATIC int xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) - return -EFAULT; + return -XFS_ERROR(EFAULT); return sizeof(*p32); } @@ -165,20 +174,20 @@ xfs_ioc_bulkstat_compat( /* should be called again (unused here, but used in dmapi) */ if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + return -XFS_ERROR(EPERM); if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); if (get_user(addr, &p32->lastip)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.lastip = compat_ptr(addr); if (get_user(bulkreq.icount, &p32->icount) || get_user(addr, &p32->ubuffer)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.ubuffer = compat_ptr(addr); if (get_user(addr, &p32->ocount)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.ocount = compat_ptr(addr); if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) @@ -216,38 +225,40 @@ xfs_ioc_bulkstat_compat( return 0; } -STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg) +STATIC int +xfs_compat_handlereq_copyin( + xfs_fsop_handlereq_t *hreq, + compat_xfs_fsop_handlereq_t __user *arg32) { - compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg; - xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p)); - u32 addr; - - if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) || - get_user(addr, &p32->path) || - put_user(compat_ptr(addr), &p->path) || - copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) || - get_user(addr, &p32->ihandle) || - put_user(compat_ptr(addr), &p->ihandle) || - copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) || - get_user(addr, &p32->ohandle) || - put_user(compat_ptr(addr), &p->ohandle) || - get_user(addr, &p32->ohandlen) || - put_user(compat_ptr(addr), &p->ohandlen)) - return -EFAULT; - - return (unsigned long)p; + compat_xfs_fsop_handlereq_t hreq32; + + if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + + hreq->fd = hreq32.fd; + hreq->path = compat_ptr(hreq32.path); + hreq->oflags = hreq32.oflags; + hreq->ihandle = compat_ptr(hreq32.ihandle); + hreq->ihandlen = hreq32.ihandlen; + hreq->ohandle = compat_ptr(hreq32.ohandle); + hreq->ohandlen = compat_ptr(hreq32.ohandlen); + + return 0; } STATIC long xfs_compat_ioctl( - int mode, - struct file *file, + xfs_inode_t *ip, + struct file *filp, + int ioflags, unsigned cmd, - unsigned long arg) + void __user *arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; + xfs_mount_t *mp = ip->i_mount; int error; + xfs_itrace_entry(XFS_I(inode)); switch (cmd) { case XFS_IOC_DIOINFO: case XFS_IOC_FSGEOMETRY: @@ -290,14 +301,16 @@ xfs_compat_ioctl( case XFS_IOC_RESVSP_32: case XFS_IOC_UNRESVSP_32: case XFS_IOC_RESVSP64_32: - case XFS_IOC_UNRESVSP64_32: - arg = xfs_ioctl32_flock(arg); + case XFS_IOC_UNRESVSP64_32: { + struct xfs_flock64 bf; + + if (xfs_compat_flock64_copyin(&bf, arg)) + return -XFS_ERROR(EFAULT); cmd = _NATIVE_IOC(cmd, struct xfs_flock64); - break; + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } case XFS_IOC_FSGEOMETRY_V1_32: - arg = xfs_ioctl32_geom_v1(arg); - cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1); - break; + return xfs_compat_ioc_fsgeometry_v1(mp, arg); #else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: @@ -323,35 +336,55 @@ xfs_compat_ioctl( cmd, (void __user*)arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: - case XFS_IOC_PATH_TO_FSHANDLE_32: - case XFS_IOC_OPEN_BY_HANDLE_32: - case XFS_IOC_READLINK_BY_HANDLE_32: - arg = xfs_ioctl32_fshandle(arg); + case XFS_IOC_PATH_TO_FSHANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); - break; + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(mp, &hreq, filp, inode); + } + case XFS_IOC_READLINK_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(mp, &hreq, inode); + } default: - return -ENOIOCTLCMD; + return -XFS_ERROR(ENOIOCTLCMD); } - error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); + error = xfs_ioctl(ip, filp, ioflags, cmd, arg); return error; } long xfs_file_compat_ioctl( - struct file *file, - unsigned cmd, - unsigned long arg) + struct file *filp, + unsigned int cmd, + unsigned long p) { - return xfs_compat_ioctl(0, file, cmd, arg); + struct inode *inode = filp->f_path.dentry->d_inode; + + return xfs_compat_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); } long xfs_file_compat_invis_ioctl( - struct file *file, - unsigned cmd, - unsigned long arg) + struct file *filp, + unsigned int cmd, + unsigned long p) { - return xfs_compat_ioctl(IO_INVIS, file, cmd, arg); + struct inode *inode = filp->f_path.dentry->d_inode; + + return xfs_compat_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, + (void __user *)p); } -- cgit v1.2.3 From e94fc4a43e5c39f689e83caf6d2f0939081f5e6b Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:09 -0600 Subject: [XFS] Add compat handlers for swapext ioctl The big hitter here was the bstat field, which contains different sized time_t on 32 vs. 64 bit. Add a copyin function to translate the 32-bit arg to 64-bit, and call the swapext ioctl helper. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 61 ++++++++++++++++++++++++++++++++++++++---- fs/xfs/linux-2.6/xfs_ioctl32.h | 13 +++++++++ 2 files changed, 69 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 35edc054fb9..132f3dd2c7e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -108,6 +108,50 @@ xfs_inumbers_fmt_compat( #define xfs_inumbers_fmt_compat xfs_inumbers_fmt #endif +STATIC int +xfs_ioctl32_bstime_copyin( + xfs_bstime_t *bstime, + compat_xfs_bstime_t __user *bstime32) +{ + compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */ + + if (get_user(sec32, &bstime32->tv_sec) || + get_user(bstime->tv_nsec, &bstime32->tv_nsec)) + return -XFS_ERROR(EFAULT); + bstime->tv_sec = sec32; + return 0; +} + +/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ +STATIC int +xfs_ioctl32_bstat_copyin( + xfs_bstat_t *bstat, + compat_xfs_bstat_t __user *bstat32) +{ + if (get_user(bstat->bs_ino, &bstat32->bs_ino) || + get_user(bstat->bs_mode, &bstat32->bs_mode) || + get_user(bstat->bs_nlink, &bstat32->bs_nlink) || + get_user(bstat->bs_uid, &bstat32->bs_uid) || + get_user(bstat->bs_gid, &bstat32->bs_gid) || + get_user(bstat->bs_rdev, &bstat32->bs_rdev) || + get_user(bstat->bs_blksize, &bstat32->bs_blksize) || + get_user(bstat->bs_size, &bstat32->bs_size) || + xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) || + get_user(bstat->bs_blocks, &bstat32->bs_size) || + get_user(bstat->bs_xflags, &bstat32->bs_size) || + get_user(bstat->bs_extsize, &bstat32->bs_extsize) || + get_user(bstat->bs_extents, &bstat32->bs_extents) || + get_user(bstat->bs_gen, &bstat32->bs_gen) || + get_user(bstat->bs_projid, &bstat32->bs_projid) || + get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || + get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || + get_user(bstat->bs_aextents, &bstat32->bs_aextents)) + return -XFS_ERROR(EFAULT); + return 0; +} + /* XFS_IOC_FSBULKSTAT and friends */ STATIC int @@ -292,6 +336,18 @@ xfs_compat_ioctl( case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); break; + case XFS_IOC_SWAPEXT: { + struct xfs_swapext sxp; + struct compat_xfs_swapext __user *sxu = arg; + + /* Bulk copy in up to the sx_stat field, then grab bstat */ + if (copy_from_user(&sxp, sxu, + offsetof(xfs_swapext_t, sx_stat)) || + xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } #ifdef BROKEN_X86_ALIGNMENT /* xfs_flock_t has wrong u32 vs u64 alignment */ case XFS_IOC_ALLOCSP_32: @@ -322,11 +378,6 @@ xfs_compat_ioctl( case XFS_IOC_UNRESVSP64: case XFS_IOC_FSGEOMETRY_V1: break; - - /* xfs_bstat_t still has wrong u32 vs u64 alignment */ - case XFS_IOC_SWAPEXT: - break; - #endif case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 003a10e695b..bf08ab12a6f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -110,6 +110,19 @@ typedef struct compat_xfs_fsop_handlereq { #define XFS_IOC_READLINK_BY_HANDLE_32 \ _IOWR('X', 108, struct compat_xfs_fsop_handlereq) +/* The bstat field in the swapext struct needs translation */ +typedef struct compat_xfs_swapext { + __int64_t sx_version; /* version */ + __int64_t sx_fdtarget; /* fd of target file */ + __int64_t sx_fdtmp; /* fd of tmp file */ + xfs_off_t sx_offset; /* offset into file */ + xfs_off_t sx_length; /* leng from offset */ + char sx_pad[16]; /* pad space, unused */ + compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ +} __compat_packed compat_xfs_swapext_t; + +#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) + #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { -- cgit v1.2.3 From 471d59103167c84f17b9bcfee22ed10b44ff206e Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:10 -0600 Subject: [XFS] Add compat handlers for data & rt growfs ioctls The args for XFS_IOC_FSGROWFSDATA and XFS_IOC_FSGROWFSRTA have padding on the end on intel, so add arg copyin functions, and then just call the growfs ioctl helpers. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 40 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_ioctl32.h | 14 ++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 132f3dd2c7e..d1ac5d5c009 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -44,6 +44,8 @@ #include "xfs_dfrag.h" #include "xfs_vnodeops.h" #include "xfs_fsops.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" #include "xfs_ioctl.h" #include "xfs_ioctl32.h" @@ -84,6 +86,28 @@ xfs_compat_ioc_fsgeometry_v1( return 0; } +STATIC int +xfs_compat_growfs_data_copyin( + struct xfs_growfs_data *in, + compat_xfs_growfs_data_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->imaxpct, &arg32->imaxpct)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_growfs_rt_copyin( + struct xfs_growfs_rt *in, + compat_xfs_growfs_rt_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->extsize, &arg32->extsize)) + return -XFS_ERROR(EFAULT); + return 0; +} + STATIC int xfs_inumbers_fmt_compat( void __user *ubuffer, @@ -367,6 +391,22 @@ xfs_compat_ioctl( } case XFS_IOC_FSGEOMETRY_V1_32: return xfs_compat_ioc_fsgeometry_v1(mp, arg); + case XFS_IOC_FSGROWFSDATA_32: { + struct xfs_growfs_data in; + + if (xfs_compat_growfs_data_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_data(mp, &in); + return -error; + } + case XFS_IOC_FSGROWFSRT_32: { + struct xfs_growfs_rt in; + + if (xfs_compat_growfs_rt_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_rt(mp, &in); + return -error; + } #else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index bf08ab12a6f..8b2a12a8e96 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -177,6 +177,20 @@ typedef struct compat_xfs_inogrp { __u64 xi_allocmask; /* mask of allocated inodes */ } __attribute__((packed)) compat_xfs_inogrp_t; +/* These growfs input structures have padding on the end, so must translate */ +typedef struct compat_xfs_growfs_data { + __u64 newblocks; /* new data subvol size, fsblocks */ + __u32 imaxpct; /* new inode space percentage limit */ +} __attribute__((packed)) compat_xfs_growfs_data_t; + +typedef struct compat_xfs_growfs_rt { + __u64 newblocks; /* new realtime size, fsblocks */ + __u32 extsize; /* new realtime extent size, fsblocks */ +} __attribute__((packed)) compat_xfs_growfs_rt_t; + +#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data) +#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt) + #endif /* BROKEN_X86_ALIGNMENT */ #endif /* __XFS_IOCTL32_H__ */ -- cgit v1.2.3 From 2ee4fa5cb716eba104a4ef8efe159e1007a2aef6 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:11 -0600 Subject: [XFS] Make the bulkstat_one compat ioctl handling more sane Currently the compat formatter was handled by passing in "private_data" for the xfs_bulkstat_one formatter, which was really just another formatter... IMHO this got confusing. Instead, just make a new xfs_bulkstat_one_compat formatter for xfs_bulkstat, and call it via a wrapper. Also, don't translate the ioctl nrs into their native counterparts, that just clouds the issue; we're in a compat handler anyway, just switch on the 32-bit cmds. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 36 ++++++++++++++++++++++++------------ fs/xfs/xfs_itable.c | 24 ++++++++++++++++++++---- fs/xfs/xfs_itable.h | 12 ++++++++++++ 3 files changed, 56 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index d1ac5d5c009..a97022f2d9b 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -223,14 +223,30 @@ xfs_bulkstat_one_fmt_compat( return sizeof(*p32); } +STATIC int +xfs_bulkstat_one_compat( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + void *private_data, /* my private data */ + xfs_daddr_t bno, /* starting bno of inode cluster */ + int *ubused, /* bytes used by me */ + void *dibuff, /* on-disk inode buffer */ + int *stat) /* BULKSTAT_RV_... */ +{ + return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, + xfs_bulkstat_one_fmt_compat, bno, + ubused, dibuff, stat); +} + /* copied from xfs_ioctl.c */ STATIC int -xfs_ioc_bulkstat_compat( - xfs_mount_t *mp, - unsigned int cmd, - void __user *arg) +xfs_compat_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + compat_xfs_fsop_bulkreq_t __user *p32) { - compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg; u32 addr; xfs_fsop_bulkreq_t bulkreq; int count; /* # of records returned */ @@ -267,14 +283,12 @@ xfs_ioc_bulkstat_compat( if (bulkreq.ubuffer == NULL) return -XFS_ERROR(EINVAL); - if (cmd == XFS_IOC_FSINUMBERS) + if (cmd == XFS_IOC_FSINUMBERS_32) error = xfs_inumbers(mp, &inlast, &count, bulkreq.ubuffer, xfs_inumbers_fmt_compat); else { - /* declare a var to get a warning in case the type changes */ - bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat; error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one, formatter, + xfs_bulkstat_one_compat, NULL, sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, BULKSTAT_FG_QUICK, &done); } @@ -422,9 +436,7 @@ xfs_compat_ioctl( case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: - cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq); - return xfs_ioc_bulkstat_compat(XFS_I(inode)->i_mount, - cmd, (void __user*)arg); + return xfs_compat_ioc_bulkstat(mp, cmd, arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: case XFS_IOC_PATH_TO_FSHANDLE_32: { diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index d4c0de86012..7bd49b87160 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -202,13 +202,13 @@ xfs_bulkstat_one_fmt( * Return stat information for one inode. * Return 0 if ok, else errno. */ -int /* error status */ -xfs_bulkstat_one( +int /* error status */ +xfs_bulkstat_one_int( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* buffer to place output in */ int ubsize, /* size of buffer */ - void *private_data, /* my private data */ + bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ xfs_daddr_t bno, /* starting bno of inode cluster */ int *ubused, /* bytes used by me */ void *dibuff, /* on-disk inode buffer */ @@ -217,7 +217,6 @@ xfs_bulkstat_one( xfs_bstat_t *buf; /* return buffer */ int error = 0; /* error value */ xfs_dinode_t *dip; /* dinode inode pointer */ - bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt; dip = (xfs_dinode_t *)dibuff; *stat = BULKSTAT_RV_NOTHING; @@ -255,6 +254,23 @@ xfs_bulkstat_one( return error; } +int +xfs_bulkstat_one( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + void *private_data, /* my private data */ + xfs_daddr_t bno, /* starting bno of inode cluster */ + int *ubused, /* bytes used by me */ + void *dibuff, /* on-disk inode buffer */ + int *stat) /* BULKSTAT_RV_... */ +{ + return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, + xfs_bulkstat_one_fmt, bno, + ubused, dibuff, stat); +} + /* * Test to see whether we can use the ondisk inode directly, based * on the given bulkstat flags, filling in dipp accordingly. diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index a1f18fce9b7..e210a4c0656 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -73,6 +73,18 @@ typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ void __user *ubuffer, /* buffer to write to */ const xfs_bstat_t *buffer); /* buffer to read from */ +int +xfs_bulkstat_one_int( + xfs_mount_t *mp, + xfs_ino_t ino, + void __user *buffer, + int ubsize, + bulkstat_one_fmt_pf formatter, + xfs_daddr_t bno, + int *ubused, + void *dibuff, + int *stat); + int xfs_bulkstat_one( xfs_mount_t *mp, -- cgit v1.2.3 From 65fbaf2489c667bf79ae1f20403f30c66568d445 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:12 -0600 Subject: [XFS] Fix xfs_bulkstat_one size checks & error handling The 32-bit xfs_blkstat_one handler was failing because a size check checked whether the remaining (32-bit) user buffer was less than the (64-bit) bulkstat buffer, and failed with ENOMEM if so. Move this check into the respective handlers so that they check the correct sizes. Also, the formatters were returning negative errors or positive bytes copied; this was odd in the positive error value world of xfs, and handled wrong by at least some of the callers, which treated the bytes returned as an error value. Move the bytes-used assignment into the formatters. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 44 +++++++++++++++++++++++++----------------- fs/xfs/xfs_itable.c | 21 ++++++++++---------- fs/xfs/xfs_itable.h | 2 ++ 3 files changed, 39 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index a97022f2d9b..2d336062831 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -192,35 +192,43 @@ xfs_bstime_store_compat( return 0; } +/* Return 0 on success or positive error (to xfs_bulkstat()) */ STATIC int xfs_bulkstat_one_fmt_compat( void __user *ubuffer, + int ubsize, + int *ubused, const xfs_bstat_t *buffer) { compat_xfs_bstat_t __user *p32 = ubuffer; - if (put_user(buffer->bs_ino, &p32->bs_ino) || - put_user(buffer->bs_mode, &p32->bs_mode) || - put_user(buffer->bs_nlink, &p32->bs_nlink) || - put_user(buffer->bs_uid, &p32->bs_uid) || - put_user(buffer->bs_gid, &p32->bs_gid) || - put_user(buffer->bs_rdev, &p32->bs_rdev) || - put_user(buffer->bs_blksize, &p32->bs_blksize) || - put_user(buffer->bs_size, &p32->bs_size) || + if (ubsize < sizeof(*p32)) + return XFS_ERROR(ENOMEM); + + if (put_user(buffer->bs_ino, &p32->bs_ino) || + put_user(buffer->bs_mode, &p32->bs_mode) || + put_user(buffer->bs_nlink, &p32->bs_nlink) || + put_user(buffer->bs_uid, &p32->bs_uid) || + put_user(buffer->bs_gid, &p32->bs_gid) || + put_user(buffer->bs_rdev, &p32->bs_rdev) || + put_user(buffer->bs_blksize, &p32->bs_blksize) || + put_user(buffer->bs_size, &p32->bs_size) || xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || - put_user(buffer->bs_blocks, &p32->bs_blocks) || - put_user(buffer->bs_xflags, &p32->bs_xflags) || - put_user(buffer->bs_extsize, &p32->bs_extsize) || - put_user(buffer->bs_extents, &p32->bs_extents) || - put_user(buffer->bs_gen, &p32->bs_gen) || - put_user(buffer->bs_projid, &p32->bs_projid) || - put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || - put_user(buffer->bs_dmstate, &p32->bs_dmstate) || + put_user(buffer->bs_blocks, &p32->bs_blocks) || + put_user(buffer->bs_xflags, &p32->bs_xflags) || + put_user(buffer->bs_extsize, &p32->bs_extsize) || + put_user(buffer->bs_extents, &p32->bs_extents) || + put_user(buffer->bs_gen, &p32->bs_gen) || + put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || + put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) - return -XFS_ERROR(EFAULT); - return sizeof(*p32); + return XFS_ERROR(EFAULT); + if (ubused) + *ubused = sizeof(*p32); + return 0; } STATIC int diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 7bd49b87160..e19d0a8d561 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -188,14 +188,21 @@ xfs_bulkstat_one_dinode( } } +/* Return 0 on success or positive error */ STATIC int xfs_bulkstat_one_fmt( void __user *ubuffer, + int ubsize, + int *ubused, const xfs_bstat_t *buffer) { + if (ubsize < sizeof(*buffer)) + return XFS_ERROR(ENOMEM); if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) - return -EFAULT; - return sizeof(*buffer); + return XFS_ERROR(EFAULT); + if (ubused) + *ubused = sizeof(*buffer); + return 0; } /* @@ -223,8 +230,6 @@ xfs_bulkstat_one_int( if (!buffer || xfs_internal_inum(mp, ino)) return XFS_ERROR(EINVAL); - if (ubsize < sizeof(*buf)) - return XFS_ERROR(ENOMEM); buf = kmem_alloc(sizeof(*buf), KM_SLEEP); @@ -239,15 +244,11 @@ xfs_bulkstat_one_int( xfs_bulkstat_one_dinode(mp, ino, dip, buf); } - error = formatter(buffer, buf); - if (error < 0) { - error = EFAULT; + error = formatter(buffer, ubsize, ubused, buf); + if (error) goto out_free; - } *stat = BULKSTAT_RV_DIDONE; - if (ubused) - *ubused = error; out_free: kmem_free(buf); diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index e210a4c0656..1fb04e7deb6 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -71,6 +71,8 @@ xfs_bulkstat_single( typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ void __user *ubuffer, /* buffer to write to */ + int ubsize, /* remaining user buffer sz */ + int *ubused, /* bytes used by formatter */ const xfs_bstat_t *buffer); /* buffer to read from */ int -- cgit v1.2.3 From af819d27637119105213433881f158931e29620b Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:13 -0600 Subject: [XFS] Fix compat XFS_IOC_FSBULKSTAT_SINGLE ioctl The XFS_IOC_FSBULKSTAT_SINGLE ioctl passes in the desired inode number, while XFS_IOC_FSBULKSTAT passes in the previous/last-stat'd inode number. The compat handler wasn't differentiating these, so when a XFS_IOC_FSBULKSTAT_SINGLE request for inode 128 was sent in, stat information for 131 was sent out. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 2d336062831..cc1fe96efab 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -291,15 +291,22 @@ xfs_compat_ioc_bulkstat( if (bulkreq.ubuffer == NULL) return -XFS_ERROR(EINVAL); - if (cmd == XFS_IOC_FSINUMBERS_32) + if (cmd == XFS_IOC_FSINUMBERS_32) { error = xfs_inumbers(mp, &inlast, &count, bulkreq.ubuffer, xfs_inumbers_fmt_compat); - else { + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { + int res; + + error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, + sizeof(compat_xfs_bstat_t), + NULL, 0, NULL, NULL, &res); + } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one_compat, NULL, sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, BULKSTAT_FG_QUICK, &done); - } + } else + error = XFS_ERROR(EINVAL); if (error) return -error; -- cgit v1.2.3 From ebeecd2b04645a4b79e1bc00d69cf4f98e03a684 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:14 -0600 Subject: [XFS] Hook up compat XFS_IOC_ATTRLIST_BY_HANDLE ioctl handler Add a compat handler for XFS_IOC_ATTRLIST_BY_HANDLE Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 136 ++++++++++++++++++++++++++++++++++++++++- fs/xfs/linux-2.6/xfs_ioctl32.h | 12 ++++ 2 files changed, 147 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index cc1fe96efab..e7eb0d7e0d5 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -46,6 +46,7 @@ #include "xfs_fsops.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" +#include "xfs_attr.h" #include "xfs_ioctl.h" #include "xfs_ioctl32.h" @@ -343,6 +344,138 @@ xfs_compat_handlereq_copyin( return 0; } +/* + * Convert userspace handle data into inode. + * + * We use the fact that all the fsop_handlereq ioctl calls have a data + * structure argument whose first component is always a xfs_fsop_handlereq_t, + * so we can pass that sub structure into this handy, shared routine. + * + * If no error, caller must always iput the returned inode. + */ +STATIC int +xfs_vget_fsop_handlereq_compat( + xfs_mount_t *mp, + struct inode *parinode, /* parent inode pointer */ + compat_xfs_fsop_handlereq_t *hreq, + struct inode **inode) +{ + void __user *hanp; + size_t hlen; + xfs_fid_t *xfid; + xfs_handle_t *handlep; + xfs_handle_t handle; + xfs_inode_t *ip; + xfs_ino_t ino; + __u32 igen; + int error; + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(parinode->i_mode)) + return XFS_ERROR(ENOTDIR); + + hanp = compat_ptr(hreq->ihandle); + hlen = hreq->ihandlen; + handlep = &handle; + + if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) + return XFS_ERROR(EINVAL); + if (copy_from_user(handlep, hanp, hlen)) + return XFS_ERROR(EFAULT); + if (hlen < sizeof(*handlep)) + memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); + if (hlen > sizeof(handlep->ha_fsid)) { + if (handlep->ha_fid.fid_len != + (hlen - sizeof(handlep->ha_fsid) - + sizeof(handlep->ha_fid.fid_len)) || + handlep->ha_fid.fid_pad) + return XFS_ERROR(EINVAL); + } + + /* + * Crack the handle, obtain the inode # & generation # + */ + xfid = (struct xfs_fid *)&handlep->ha_fid; + if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) { + ino = xfid->fid_ino; + igen = xfid->fid_gen; + } else { + return XFS_ERROR(EINVAL); + } + + /* + * Get the XFS inode, building a Linux inode to go with it. + */ + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); + if (error) + return error; + if (ip == NULL) + return XFS_ERROR(EIO); + if (ip->i_d.di_gen != igen) { + xfs_iput_new(ip, XFS_ILOCK_SHARED); + return XFS_ERROR(ENOENT); + } + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + *inode = VFS_I(ip); + return 0; +} + +STATIC int +xfs_compat_attrlist_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + attrlist_cursor_kern_t *cursor; + compat_xfs_fsop_attrlist_handlereq_t al_hreq; + struct inode *inode; + char *kbuf; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&al_hreq, arg, + sizeof(compat_xfs_fsop_attrlist_handlereq_t))) + return -XFS_ERROR(EFAULT); + if (al_hreq.buflen > XATTR_LIST_MAX) + return -XFS_ERROR(EINVAL); + + /* + * Reject flags, only allow namespaces. + */ + if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + return -XFS_ERROR(EINVAL); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &al_hreq.hreq, + &inode); + if (error) + goto out; + + kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); + if (!kbuf) + goto out_vn_rele; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen, + al_hreq.flags, cursor); + if (error) + goto out_kfree; + + if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) + error = -EFAULT; + + out_kfree: + kfree(kbuf); + out_vn_rele: + iput(inode); + out: + return -error; +} + STATIC long xfs_compat_ioctl( xfs_inode_t *ip, @@ -368,7 +501,6 @@ xfs_compat_ioctl( case XFS_IOC_GETBMAPX: /* not handled case XFS_IOC_FSSETDM_BY_HANDLE: - case XFS_IOC_ATTRLIST_BY_HANDLE: case XFS_IOC_ATTRMULTI_BY_HANDLE: */ case XFS_IOC_FSCOUNTS: @@ -476,6 +608,8 @@ xfs_compat_ioctl( return -XFS_ERROR(EFAULT); return xfs_readlink_by_handle(mp, &hreq, inode); } + case XFS_IOC_ATTRLIST_BY_HANDLE_32: + return xfs_compat_attrlist_by_handle(mp, arg, inode); default: return -XFS_ERROR(ENOIOCTLCMD); } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 8b2a12a8e96..bbf3a2cd8a7 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -123,6 +123,18 @@ typedef struct compat_xfs_swapext { #define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) +typedef struct compat_xfs_fsop_attrlist_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ + __u32 flags; /* which namespace to use */ + __u32 buflen; /* length of buffer supplied */ + compat_uptr_t buffer; /* returned names */ +} __compat_packed compat_xfs_fsop_attrlist_handlereq_t; + +/* Note: actually this is read/write */ +#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ + _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) + #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { -- cgit v1.2.3 From 28750975ace79c547407a84d3969cbed516be8f8 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:15 -0600 Subject: [XFS] Hook up compat XFS_IOC_ATTRMULTI_BY_HANDLE ioctl handler Add a compat handler for XFS_IOC_ATTRMULTI_BY_HANDLE Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl.c | 6 +-- fs/xfs/linux-2.6/xfs_ioctl.h | 23 +++++++++++ fs/xfs/linux-2.6/xfs_ioctl32.c | 94 ++++++++++++++++++++++++++++++++++++++++-- fs/xfs/linux-2.6/xfs_ioctl32.h | 20 +++++++++ 4 files changed, 136 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 498d3e15843..c8f1e632ba9 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -490,7 +490,7 @@ xfs_attrlist_by_handle( return -error; } -STATIC int +int xfs_attrmulti_attr_get( struct inode *inode, char *name, @@ -519,7 +519,7 @@ xfs_attrmulti_attr_get( return error; } -STATIC int +int xfs_attrmulti_attr_set( struct inode *inode, char *name, @@ -549,7 +549,7 @@ xfs_attrmulti_attr_set( return error; } -STATIC int +int xfs_attrmulti_attr_remove( struct inode *inode, char *name, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index 2df849f49a5..f67dc69381e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -44,4 +44,27 @@ xfs_readlink_by_handle( xfs_mount_t *mp, xfs_fsop_handlereq_t *hreq, struct inode *parinode); + +extern int +xfs_attrmulti_attr_get( + struct inode *inode, + char *name, + char __user *ubuf, + __uint32_t *len, + __uint32_t flags); + +extern int + xfs_attrmulti_attr_set( + struct inode *inode, + char *name, + const char __user *ubuf, + __uint32_t len, + __uint32_t flags); + +extern int +xfs_attrmulti_attr_remove( + struct inode *inode, + char *name, + __uint32_t flags); + #endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index e7eb0d7e0d5..17993352aa6 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -476,6 +476,93 @@ xfs_compat_attrlist_by_handle( return -error; } +STATIC int +xfs_compat_attrmulti_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + compat_xfs_attr_multiop_t *ops; + compat_xfs_fsop_attrmulti_handlereq_t am_hreq; + struct inode *inode; + unsigned int i, size; + char *attr_name; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&am_hreq, arg, + sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) + return -XFS_ERROR(EFAULT); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &am_hreq.hreq, + &inode); + if (error) + goto out; + + error = E2BIG; + size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_vn_rele; + + error = ENOMEM; + ops = kmalloc(size, GFP_KERNEL); + if (!ops) + goto out_vn_rele; + + error = EFAULT; + if (copy_from_user(ops, compat_ptr(am_hreq.ops), size)) + goto out_kfree_ops; + + attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); + if (!attr_name) + goto out_kfree_ops; + + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = strncpy_from_user(attr_name, + compat_ptr(ops[i].am_attrname), + MAXNAMELEN); + if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) + error = -ERANGE; + if (ops[i].am_error < 0) + break; + + switch (ops[i].am_opcode) { + case ATTR_OP_GET: + ops[i].am_error = xfs_attrmulti_attr_get(inode, + attr_name, + compat_ptr(ops[i].am_attrvalue), + &ops[i].am_length, ops[i].am_flags); + break; + case ATTR_OP_SET: + ops[i].am_error = xfs_attrmulti_attr_set(inode, + attr_name, + compat_ptr(ops[i].am_attrvalue), + ops[i].am_length, ops[i].am_flags); + break; + case ATTR_OP_REMOVE: + ops[i].am_error = xfs_attrmulti_attr_remove(inode, + attr_name, ops[i].am_flags); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) + error = XFS_ERROR(EFAULT); + + kfree(attr_name); + out_kfree_ops: + kfree(ops); + out_vn_rele: + iput(inode); + out: + return -error; +} + STATIC long xfs_compat_ioctl( xfs_inode_t *ip, @@ -499,10 +586,7 @@ xfs_compat_ioctl( case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: -/* not handled - case XFS_IOC_FSSETDM_BY_HANDLE: - case XFS_IOC_ATTRMULTI_BY_HANDLE: -*/ +/* case XFS_IOC_FSSETDM_BY_HANDLE: not handled */ case XFS_IOC_FSCOUNTS: case XFS_IOC_SET_RESBLKS: case XFS_IOC_GET_RESBLKS: @@ -610,6 +694,8 @@ xfs_compat_ioctl( } case XFS_IOC_ATTRLIST_BY_HANDLE_32: return xfs_compat_attrlist_by_handle(mp, arg, inode); + case XFS_IOC_ATTRMULTI_BY_HANDLE_32: + return xfs_compat_attrmulti_by_handle(mp, arg, inode); default: return -XFS_ERROR(ENOIOCTLCMD); } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index bbf3a2cd8a7..785e7ec318c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -135,6 +135,26 @@ typedef struct compat_xfs_fsop_attrlist_handlereq { #define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) +/* am_opcodes defined in xfs_fs.h */ +typedef struct compat_xfs_attr_multiop { + __u32 am_opcode; + __s32 am_error; + compat_uptr_t am_attrname; + compat_uptr_t am_attrvalue; + __u32 am_length; + __u32 am_flags; +} compat_xfs_attr_multiop_t; + +typedef struct compat_xfs_fsop_attrmulti_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + __u32 opcount;/* count of following multiop */ + /* ptr to compat_xfs_attr_multiop */ + compat_uptr_t ops; /* attr_multi data */ +} compat_xfs_fsop_attrmulti_handlereq_t; + +#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ + _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) + #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { -- cgit v1.2.3 From 710d62aaaf17c841b8bdbc7a775f8910a7160248 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:16 -0600 Subject: [XFS] Hook up compat XFS_IOC_FSSETDM_BY_HANDLE ioctl handler Add a compat handler for XFS_IOC_FSSETDM_BY_HANDLE. I haven't tested this, lacking dmapi tools to do so (unless xfsqa magically gets this somehow?) Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 43 +++++++++++++++++++++++++++++++++++++++++- fs/xfs/linux-2.6/xfs_ioctl32.h | 9 +++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 17993352aa6..9234360910f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -563,6 +563,46 @@ xfs_compat_attrmulti_by_handle( return -error; } +STATIC int +xfs_compat_fssetdm_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + struct fsdmidata fsd; + compat_xfs_fsop_setdm_handlereq_t dmhreq; + struct inode *inode; + + if (!capable(CAP_MKNOD)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&dmhreq, arg, + sizeof(compat_xfs_fsop_setdm_handlereq_t))) + return -XFS_ERROR(EFAULT); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &dmhreq.hreq, + &inode); + if (error) + return -error; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + error = -XFS_ERROR(EPERM); + goto out; + } + + if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { + error = -XFS_ERROR(EFAULT); + goto out; + } + + error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); + +out: + iput(inode); + return error; +} + STATIC long xfs_compat_ioctl( xfs_inode_t *ip, @@ -586,7 +626,6 @@ xfs_compat_ioctl( case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: -/* case XFS_IOC_FSSETDM_BY_HANDLE: not handled */ case XFS_IOC_FSCOUNTS: case XFS_IOC_SET_RESBLKS: case XFS_IOC_GET_RESBLKS: @@ -696,6 +735,8 @@ xfs_compat_ioctl( return xfs_compat_attrlist_by_handle(mp, arg, inode); case XFS_IOC_ATTRMULTI_BY_HANDLE_32: return xfs_compat_attrmulti_by_handle(mp, arg, inode); + case XFS_IOC_FSSETDM_BY_HANDLE_32: + return xfs_compat_fssetdm_by_handle(mp, arg, inode); default: return -XFS_ERROR(ENOIOCTLCMD); } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 785e7ec318c..af918749d18 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -155,6 +155,15 @@ typedef struct compat_xfs_fsop_attrmulti_handlereq { #define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) +typedef struct compat_xfs_fsop_setdm_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle information */ + /* ptr to struct fsdmidata */ + compat_uptr_t data; /* DMAPI data */ +} compat_xfs_fsop_setdm_handlereq_t; + +#define XFS_IOC_FSSETDM_BY_HANDLE_32 \ + _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq) + #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { -- cgit v1.2.3 From e5d412f17846b0aea9e5250926f994ab2e4e1006 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:17 -0600 Subject: [XFS] Reorder xfs_ioctl32.c for some tidiness Put things in IMHO a more readable order, now that it's all done; add some comments. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl32.c | 78 +++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 42 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 9234360910f..b34b3d8892a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -16,11 +16,7 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include -#include #include -#include -#include -#include #include #include "xfs.h" #include "xfs_fs.h" @@ -131,7 +127,7 @@ xfs_inumbers_fmt_compat( #else #define xfs_inumbers_fmt_compat xfs_inumbers_fmt -#endif +#endif /* BROKEN_X86_ALIGNMENT */ STATIC int xfs_ioctl32_bstime_copyin( @@ -617,6 +613,7 @@ xfs_compat_ioctl( xfs_itrace_entry(XFS_I(inode)); switch (cmd) { + /* No size or alignment issues on any arch */ case XFS_IOC_DIOINFO: case XFS_IOC_FSGEOMETRY: case XFS_IOC_FSGETXATTR: @@ -629,35 +626,28 @@ xfs_compat_ioctl( case XFS_IOC_FSCOUNTS: case XFS_IOC_SET_RESBLKS: case XFS_IOC_GET_RESBLKS: - case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSLOG: - case XFS_IOC_FSGROWFSRT: case XFS_IOC_FREEZE: case XFS_IOC_THAW: case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: - break; - - case XFS_IOC_GETXFLAGS_32: - case XFS_IOC_SETXFLAGS_32: - case XFS_IOC_GETVERSION_32: - cmd = _NATIVE_IOC(cmd, long); - break; - case XFS_IOC_SWAPEXT: { - struct xfs_swapext sxp; - struct compat_xfs_swapext __user *sxu = arg; - - /* Bulk copy in up to the sx_stat field, then grab bstat */ - if (copy_from_user(&sxp, sxu, - offsetof(xfs_swapext_t, sx_stat)) || - xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) - return -XFS_ERROR(EFAULT); - error = xfs_swapext(&sxp); - return -error; - } -#ifdef BROKEN_X86_ALIGNMENT - /* xfs_flock_t has wrong u32 vs u64 alignment */ + return xfs_ioctl(ip, filp, ioflags, cmd, arg); +#ifndef BROKEN_X86_ALIGNMENT + /* These are handled fine if no alignment issues */ + case XFS_IOC_ALLOCSP: + case XFS_IOC_FREESP: + case XFS_IOC_RESVSP: + case XFS_IOC_UNRESVSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP64: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP64: + case XFS_IOC_FSGEOMETRY_V1: + case XFS_IOC_FSGROWFSDATA: + case XFS_IOC_FSGROWFSRT: + return xfs_ioctl(ip, filp, ioflags, cmd, arg); +#else case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: case XFS_IOC_ALLOCSP64_32: @@ -691,18 +681,25 @@ xfs_compat_ioctl( error = xfs_growfs_rt(mp, &in); return -error; } -#else /* These are handled fine if no alignment issues */ - case XFS_IOC_ALLOCSP: - case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_FSGEOMETRY_V1: - break; #endif + /* long changes size, but xfs only copiese out 32 bits */ + case XFS_IOC_GETXFLAGS_32: + case XFS_IOC_SETXFLAGS_32: + case XFS_IOC_GETVERSION_32: + cmd = _NATIVE_IOC(cmd, long); + return xfs_ioctl(ip, filp, ioflags, cmd, arg); + case XFS_IOC_SWAPEXT: { + struct xfs_swapext sxp; + struct compat_xfs_swapext __user *sxu = arg; + + /* Bulk copy in up to the sx_stat field, then copy bstat */ + if (copy_from_user(&sxp, sxu, + offsetof(struct xfs_swapext, sx_stat)) || + xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: @@ -740,9 +737,6 @@ xfs_compat_ioctl( default: return -XFS_ERROR(ENOIOCTLCMD); } - - error = xfs_ioctl(ip, filp, ioflags, cmd, arg); - return error; } long -- cgit v1.2.3 From ddcd856d81861a523d79d077facd875da1f66792 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 07:55:34 -0500 Subject: [XFS] fix compile on 32 bit systems The recent compat patches make xfs_file.c include xfs_ioctl32.h unconditional, which breaks the build on 32 bit systems which don't have the various compat defintions. Remove the include and move the defintion of xfs_file_compat_ioctl to xfs_ioctl.h so that we can avoid including all the compat defintions in xfs_file.c Signed-off-by: Christoph Hellwig Tested-by: Kamalesh Babulal Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_file.c | 2 +- fs/xfs/linux-2.6/xfs_ioctl.h | 12 ++++++++++++ fs/xfs/linux-2.6/xfs_ioctl32.h | 3 --- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 72fc8d8c8bc..d377db05d80 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -36,9 +36,9 @@ #include "xfs_inode.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_ioctl32.h" #include "xfs_vnodeops.h" #include "xfs_da_btree.h" +#include "xfs_ioctl.h" #include #include diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index f67dc69381e..a3446aad070 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -67,4 +67,16 @@ xfs_attrmulti_attr_remove( char *name, __uint32_t flags); +extern long +xfs_file_compat_ioctl( + struct file *file, + unsigned int cmd, + unsigned long arg); + +extern long +xfs_file_compat_ioctl_invis( + struct file *file, + unsigned int cmd, + unsigned long arg); + #endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index af918749d18..1024c4f8ba0 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -20,9 +20,6 @@ #include -extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long); -extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long); - /* * on 32-bit arches, ioctl argument structures may have different sizes * and/or alignment. We define compat structures which match the -- cgit v1.2.3 From 2234d54d3d855d6ffae88a24772a9389d6755e0c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:22 +0100 Subject: remove useless mnt_want_write call in xfs_write When mnt_want_write was introduced a call to it was added around xfs_ichgtime, but there is no need for this because a file can't be open read/write on a r/o mount, and a mount can't degrade r/o while we still have files open for writing. As the mnt_want_write changes were never merged into the CVS tree this patch is for mainline only. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_lrw.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 4959c874499..59b7d5f9e64 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -51,7 +51,6 @@ #include "xfs_vnodeops.h" #include -#include #include @@ -668,15 +667,8 @@ start: if (new_size > xip->i_size) xip->i_new_size = new_size; - /* - * We're not supposed to change timestamps in readonly-mounted - * filesystems. Throw it away if anyone asks us. - */ - if (likely(!(ioflags & IO_INVIS) && - !mnt_want_write(file->f_path.mnt))) { + if (likely(!(ioflags & IO_INVIS))) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - mnt_drop_write(file->f_path.mnt); - } /* * If the offset is beyond the size of the file, we have a couple -- cgit v1.2.3 From 73e6335c14209e508bec8ca7985d1fbde183bd1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:23 +0100 Subject: remove unused behvavior cruft in xfs_super.h Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_super.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 56dc48a76fa..d5d776d4cd6 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -20,24 +20,12 @@ #include -#ifdef CONFIG_XFS_DMAPI -# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) -# define vfs_initdmapi() dmapi_init() -# define vfs_exitdmapi() dmapi_uninit() -#else -# define vfs_insertdmapi(vfs) do { } while (0) -# define vfs_initdmapi() do { } while (0) -# define vfs_exitdmapi() do { } while (0) -#endif - #ifdef CONFIG_XFS_QUOTA -# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops) extern void xfs_qm_init(void); extern void xfs_qm_exit(void); # define vfs_initquota() xfs_qm_init() # define vfs_exitquota() xfs_qm_exit() #else -# define vfs_insertquota(vfs) do { } while (0) # define vfs_initquota() do { } while (0) # define vfs_exitquota() do { } while (0) #endif -- cgit v1.2.3 From ccd0be6cfc6943c4e0b3e3cdb598e0b7354a2d78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:24 +0100 Subject: remove unused prototypes for xfs_ihash_init / xfs_ihash_free Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_inode.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ae5800e7d39..c38d4509c66 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -496,8 +496,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) /* * xfs_iget.c prototypes. */ -void xfs_ihash_init(struct xfs_mount *); -void xfs_ihash_free(struct xfs_mount *); xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, struct xfs_trans *); int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, -- cgit v1.2.3 From 5cafdeb2891a415a5dbf0ad80f0afedf8369e6bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:25 +0100 Subject: cleanup the inode reclaim path Merge xfs_iextract and xfs_idestroy into xfs_ireclaim as they are never called individually. Also rewrite most comments in this area as they were severly out of date. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_iget.c | 128 +++++++++++++++++++++++++++++++++++------------------ fs/xfs/xfs_inode.c | 72 ------------------------------ fs/xfs/xfs_inode.h | 2 - fs/xfs/xfs_mount.h | 1 - 4 files changed, 86 insertions(+), 117 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 27ec2417b85..f58e5e000fc 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -450,65 +450,109 @@ xfs_iput_new( IRELE(ip); } - /* - * This routine embodies the part of the reclaim code that pulls - * the inode from the inode hash table and the mount structure's - * inode list. - * This should only be called from xfs_reclaim(). + * This is called free all the memory associated with an inode. + * It must free the inode itself and any buffers allocated for + * if_extents/if_data and if_broot. It must also free the lock + * associated with the inode. + * + * Note: because we don't initialise everything on reallocation out + * of the zone, we must ensure we nullify everything correctly before + * freeing the structure. */ void -xfs_ireclaim(xfs_inode_t *ip) +xfs_ireclaim( + struct xfs_inode *ip) { - /* - * Remove from old hash list and mount list. - */ - XFS_STATS_INC(xs_ig_reclaims); + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; - xfs_iextract(ip); + XFS_STATS_INC(xs_ig_reclaims); /* - * Here we do a spurious inode lock in order to coordinate with inode - * cache radix tree lookups. This is because the lookup can reference - * the inodes in the cache without taking references. We make that OK - * here by ensuring that we wait until the inode is unlocked after the - * lookup before we go ahead and free it. We get both the ilock and - * the iolock because the code may need to drop the ilock one but will - * still hold the iolock. + * Remove the inode from the per-AG radix tree. It doesn't matter + * if it was never added to it because radix_tree_delete can deal + * with that case just fine. */ - xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + pag = xfs_get_perag(mp, ip->i_ino); + write_lock(&pag->pag_ici_lock); + radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); + write_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); /* - * Release dquots (and their references) if any. An inode may escape - * xfs_inactive and get here via vn_alloc->vn_reclaim path. + * Here we do an (almost) spurious inode lock in order to coordinate + * with inode cache radix tree lookups. This is because the lookup + * can reference the inodes in the cache without taking references. + * + * We make that OK here by ensuring that we wait until the inode is + * unlocked after the lookup before we go ahead and free it. We get + * both the ilock and the iolock because the code may need to drop the + * ilock one but will still hold the iolock. */ - XFS_QM_DQDETACH(ip->i_mount, ip); - + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); /* - * Free all memory associated with the inode. + * Release dquots (and their references) if any. */ + XFS_QM_DQDETACH(ip->i_mount, ip); xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_idestroy(ip); -} -/* - * This routine removes an about-to-be-destroyed inode from - * all of the lists in which it is located with the exception - * of the behavior chain. - */ -void -xfs_iextract( - xfs_inode_t *ip) -{ - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + switch (ip->i_d.di_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + xfs_idestroy_fork(ip, XFS_DATA_FORK); + break; + } - write_lock(&pag->pag_ici_lock); - radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); - write_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + if (ip->i_afp) + xfs_idestroy_fork(ip, XFS_ATTR_FORK); - mp->m_ireclaims++; +#ifdef XFS_INODE_TRACE + ktrace_free(ip->i_trace); +#endif +#ifdef XFS_BMAP_TRACE + ktrace_free(ip->i_xtrace); +#endif +#ifdef XFS_BTREE_TRACE + ktrace_free(ip->i_btrace); +#endif +#ifdef XFS_RW_TRACE + ktrace_free(ip->i_rwtrace); +#endif +#ifdef XFS_ILOCK_TRACE + ktrace_free(ip->i_lock_trace); +#endif +#ifdef XFS_DIR2_TRACE + ktrace_free(ip->i_dir_trace); +#endif + if (ip->i_itemp) { + /* + * Only if we are shutting down the fs will we see an + * inode still in the AIL. If it is there, we should remove + * it to prevent a use-after-free from occurring. + */ + xfs_log_item_t *lip = &ip->i_itemp->ili_item; + struct xfs_ail *ailp = lip->li_ailp; + + ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || + XFS_FORCED_SHUTDOWN(ip->i_mount)); + if (lip->li_flags & XFS_LI_IN_AIL) { + spin_lock(&ailp->xa_lock); + if (lip->li_flags & XFS_LI_IN_AIL) + xfs_trans_ail_delete(ailp, lip); + else + spin_unlock(&ailp->xa_lock); + } + xfs_inode_item_destroy(ip); + ip->i_itemp = NULL; + } + /* asserts to verify all state is correct here */ + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(completion_done(&ip->i_flush)); + kmem_zone_free(xfs_inode_zone, ip); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 872191b4678..4e664f57860 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2449,78 +2449,6 @@ xfs_idestroy_fork( } } -/* - * This is called free all the memory associated with an inode. - * It must free the inode itself and any buffers allocated for - * if_extents/if_data and if_broot. It must also free the lock - * associated with the inode. - * - * Note: because we don't initialise everything on reallocation out - * of the zone, we must ensure we nullify everything correctly before - * freeing the structure. - */ -void -xfs_idestroy( - xfs_inode_t *ip) -{ - switch (ip->i_d.di_mode & S_IFMT) { - case S_IFREG: - case S_IFDIR: - case S_IFLNK: - xfs_idestroy_fork(ip, XFS_DATA_FORK); - break; - } - if (ip->i_afp) - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - -#ifdef XFS_INODE_TRACE - ktrace_free(ip->i_trace); -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(ip->i_xtrace); -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(ip->i_btrace); -#endif -#ifdef XFS_RW_TRACE - ktrace_free(ip->i_rwtrace); -#endif -#ifdef XFS_ILOCK_TRACE - ktrace_free(ip->i_lock_trace); -#endif -#ifdef XFS_DIR2_TRACE - ktrace_free(ip->i_dir_trace); -#endif - if (ip->i_itemp) { - /* - * Only if we are shutting down the fs will we see an - * inode still in the AIL. If it is there, we should remove - * it to prevent a use-after-free from occurring. - */ - xfs_log_item_t *lip = &ip->i_itemp->ili_item; - struct xfs_ail *ailp = lip->li_ailp; - - ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || - XFS_FORCED_SHUTDOWN(ip->i_mount)); - if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&ailp->xa_lock); - if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_ail_delete(ailp, lip); - else - spin_unlock(&ailp->xa_lock); - } - xfs_inode_item_destroy(ip); - ip->i_itemp = NULL; - } - /* asserts to verify all state is correct here */ - ASSERT(atomic_read(&ip->i_iocount) == 0); - ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(completion_done(&ip->i_flush)); - kmem_zone_free(xfs_inode_zone, ip); -} - - /* * Increment the pin count of the given buffer. * This value is protected by ipinlock spinlock in the mount structure. diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c38d4509c66..03ae96bdae7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -529,8 +529,6 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); -void xfs_idestroy(xfs_inode_t *); -void xfs_iextract(xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4fce22a8c35..38252554b1c 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -241,7 +241,6 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - uint m_ireclaims; /* count of calls to reclaim*/ uint m_readio_log; /* min read size log bytes */ uint m_readio_blocks; /* min read size blocks */ uint m_writeio_log; /* min write size log bytes */ -- cgit v1.2.3 From 5d765b976c3a41faf9a73718fb8cc5833990a8ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:26 +0100 Subject: kill xfs_buf_iostart xfs_buf_iostart is a "shared" helper for xfs_buf_read_flags, xfs_bawrite, and xfs_bdwrite - except that there isn't much shared code but rather special cases for each caller. So remove this function and move the functionality to the caller. xfs_bawrite and xfs_bdwrite are now big enough to be moved out of line and the xfs_buf_read_flags is moved into a new helper called _xfs_buf_read. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_buf.c | 84 ++++++++++++++++++++++++++-------------------- fs/xfs/linux-2.6/xfs_buf.h | 22 ++---------- 2 files changed, 50 insertions(+), 56 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 36d5fcd3f59..cd89c56715a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -630,6 +630,29 @@ xfs_buf_get_flags( return NULL; } +STATIC int +_xfs_buf_read( + xfs_buf_t *bp, + xfs_buf_flags_t flags) +{ + int status; + + XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags); + + ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); + ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + + status = xfs_buf_iorequest(bp); + if (!status && !(flags & XBF_ASYNC)) + status = xfs_buf_iowait(bp); + return status; +} + xfs_buf_t * xfs_buf_read_flags( xfs_buftarg_t *target, @@ -646,7 +669,7 @@ xfs_buf_read_flags( if (!XFS_BUF_ISDONE(bp)) { XB_TRACE(bp, "read", (unsigned long)flags); XFS_STATS_INC(xb_get_read); - xfs_buf_iostart(bp, flags); + _xfs_buf_read(bp, flags); } else if (flags & XBF_ASYNC) { XB_TRACE(bp, "read_async", (unsigned long)flags); /* @@ -1048,50 +1071,39 @@ xfs_buf_ioerror( XB_TRACE(bp, "ioerror", (unsigned long)error); } -/* - * Initiate I/O on a buffer, based on the flags supplied. - * The b_iodone routine in the buffer supplied will only be called - * when all of the subsidiary I/O requests, if any, have been completed. - */ int -xfs_buf_iostart( - xfs_buf_t *bp, - xfs_buf_flags_t flags) +xfs_bawrite( + void *mp, + struct xfs_buf *bp) { - int status = 0; + XB_TRACE(bp, "bawrite", 0); - XB_TRACE(bp, "iostart", (unsigned long)flags); + ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - if (flags & XBF_DELWRI) { - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); - bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); - xfs_buf_delwri_queue(bp, 1); - return 0; - } + xfs_buf_delwri_dequeue(bp); - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ - XBF_READ_AHEAD | _XBF_RUN_QUEUES); - bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \ - XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); + bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); + + bp->b_fspriv3 = mp; + bp->b_strat = xfs_bdstrat_cb; + return xfs_bdstrat_cb(bp); +} - BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL); +void +xfs_bdwrite( + void *mp, + struct xfs_buf *bp) +{ + XB_TRACE(bp, "bdwrite", 0); - /* For writes allow an alternate strategy routine to precede - * the actual I/O request (which may not be issued at all in - * a shutdown situation, for example). - */ - status = (flags & XBF_WRITE) ? - xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp); + bp->b_strat = xfs_bdstrat_cb; + bp->b_fspriv3 = mp; - /* Wait for I/O if we are not an async request. - * Note: async I/O request completion will release the buffer, - * and that can already be done by this point. So using the - * buffer pointer from here on, after async I/O, is invalid. - */ - if (!status && !(flags & XBF_ASYNC)) - status = xfs_buf_iowait(bp); + bp->b_flags &= ~XBF_READ; + bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); - return status; + xfs_buf_delwri_queue(bp, 1); } STATIC_INLINE void diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 456519a088c..0e2aa16f5e4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -214,9 +214,10 @@ extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ +extern int xfs_bawrite(void *mp, xfs_buf_t *bp); +extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); -extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t); extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, @@ -366,14 +367,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_TARGET(bp) ((bp)->b_target) #define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) -static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) -{ - bp->b_fspriv3 = mp; - bp->b_strat = xfs_bdstrat_cb; - xfs_buf_delwri_dequeue(bp); - return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); -} - static inline void xfs_buf_relse(xfs_buf_t *bp) { if (!bp->b_relse) @@ -414,17 +407,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp) return error; } -/* - * No error can be returned from xfs_buf_iostart for delwri - * buffers as they are queued and no I/O is issued. - */ -static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp) -{ - bp->b_strat = xfs_bdstrat_cb; - bp->b_fspriv3 = mp; - (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); -} - #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) #define xfs_iowait(bp) xfs_buf_iowait(bp) -- cgit v1.2.3 From d9424b3c4a1e96f87c6cfd4d8dd2f8d9bbb4dcc5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:27 +0100 Subject: stop using igrab in xfs_vn_link ->link is guranteed to get an already reference inode passed so we can do a simple increment of i_count instead of using igrab and thus avoid banging on the global inode_lock. This is what most filesystems already do. Also move the increment after the call to xfs_link to simplify error handling. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_iops.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 2903faf6a26..76b570dd1ab 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -367,21 +367,18 @@ xfs_vn_link( struct inode *dir, struct dentry *dentry) { - struct inode *inode; /* inode of guy being linked to */ + struct inode *inode = old_dentry->d_inode; struct xfs_name name; int error; - inode = old_dentry->d_inode; xfs_dentry_to_name(&name, dentry); - igrab(inode); error = xfs_link(XFS_I(dir), XFS_I(inode), &name); - if (unlikely(error)) { - iput(inode); + if (unlikely(error)) return -error; - } xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } -- cgit v1.2.3 From 39e2defe73106ca2e1c85e5286038a0a13f49513 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:28 +0100 Subject: reduce l_icloglock roundtrips All but one caller of xlog_state_want_sync drop and re-acquire l_icloglock around the call to it, just so that xlog_state_want_sync can acquire and drop it. Move all lock operation out of l_icloglock and assert that the lock is held when it is called. Note that it would make sense to extende this scheme to xlog_state_release_iclog, but the locking in there is more complicated and we'd like to keep the atomic_dec_and_lock optmization for those callers not having l_icloglock yet. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_log.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index aadaa1472f6..f4726f702a9 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -729,8 +729,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) spin_lock(&log->l_icloglock); iclog = log->l_iclog; atomic_inc(&iclog->ic_refcnt); - spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); + spin_unlock(&log->l_icloglock); error = xlog_state_release_iclog(log, iclog); spin_lock(&log->l_icloglock); @@ -767,9 +767,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) spin_lock(&log->l_icloglock); iclog = log->l_iclog; atomic_inc(&iclog->ic_refcnt); - spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); + spin_unlock(&log->l_icloglock); error = xlog_state_release_iclog(log, iclog); spin_lock(&log->l_icloglock); @@ -1984,7 +1984,9 @@ xlog_write(xfs_mount_t * mp, if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); record_cnt = data_cnt = 0; + spin_lock(&log->l_icloglock); xlog_state_want_sync(log, iclog); + spin_unlock(&log->l_icloglock); if (commit_iclog) { ASSERT(flags & XLOG_COMMIT_TRANS); *commit_iclog = iclog; @@ -3193,7 +3195,7 @@ try_again: STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) { - spin_lock(&log->l_icloglock); + ASSERT(spin_is_locked(&log->l_icloglock)); if (iclog->ic_state == XLOG_STATE_ACTIVE) { xlog_state_switch_iclogs(log, iclog, 0); @@ -3201,10 +3203,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) ASSERT(iclog->ic_state & (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); } - - spin_unlock(&log->l_icloglock); -} /* xlog_state_want_sync */ - +} /***************************************************************************** -- cgit v1.2.3 From 63ad2a5c4cf37e3242142eee8a8dcd4a8515302e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:29 +0100 Subject: remove dead code from sv_t implementation Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/sv.h | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h index 351a8f454bd..4dfc7c37081 100644 --- a/fs/xfs/linux-2.6/sv.h +++ b/fs/xfs/linux-2.6/sv.h @@ -32,23 +32,15 @@ typedef struct sv_s { wait_queue_head_t waiters; } sv_t; -#define SV_FIFO 0x0 /* sv_t is FIFO type */ -#define SV_LIFO 0x2 /* sv_t is LIFO type */ -#define SV_PRIO 0x4 /* sv_t is PRIO type */ -#define SV_KEYED 0x6 /* sv_t is KEYED type */ -#define SV_DEFAULT SV_FIFO - - -static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, - unsigned long timeout) +static inline void _sv_wait(sv_t *sv, spinlock_t *lock) { DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&sv->waiters, &wait); - __set_current_state(state); + __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(lock); - schedule_timeout(timeout); + schedule(); remove_wait_queue(&sv->waiters, &wait); } @@ -58,13 +50,7 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, #define sv_destroy(sv) \ /*NOTHING*/ #define sv_wait(sv, pri, lock, s) \ - _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) -#define sv_wait_sig(sv, pri, lock, s) \ - _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) -#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \ - _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts)) -#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \ - _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts)) + _sv_wait(sv, lock) #define sv_signal(sv) \ wake_up(&(sv)->waiters) #define sv_broadcast(sv) \ -- cgit v1.2.3 From df6771bde14551eceeacf331666a92735e0773ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:30 +0100 Subject: kill dead quota flags Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_quota.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 12c4ec775af..48965ecaa15 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -84,11 +84,9 @@ typedef struct xfs_dqblk { #define XFS_DQ_USER 0x0001 /* a user quota */ #define XFS_DQ_PROJ 0x0002 /* project quota */ #define XFS_DQ_GROUP 0x0004 /* a group quota */ -#define XFS_DQ_FLOCKED 0x0008 /* flush lock taken */ -#define XFS_DQ_DIRTY 0x0010 /* dquot is dirty */ -#define XFS_DQ_WANT 0x0020 /* for lookup/reclaim race */ -#define XFS_DQ_INACTIVE 0x0040 /* dq off mplist & hashlist */ -#define XFS_DQ_MARKER 0x0080 /* sentinel */ +#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ +#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */ +#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */ #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) -- cgit v1.2.3 From 5efcbb853bc2f051d720a191268f8dd901fea9c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:31 +0100 Subject: cleanup xfs_sb.h feature flag helpers The various inlines in xfs_sb.h that deal with the superblock version and fature flags were converted from macros a while ago, and this show by the odd coding style full of useless braces and backslashes and the avoidance of conditionals. Clean these up to look like normal C code. Signed-off-by: Christoph Hellwig Reviewed-by: Donald Douwsma Signed-off-by: Niv Sardi --- fs/xfs/xfs_sb.h | 166 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 86 insertions(+), 80 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 3f8cf1587f4..e123c1499b4 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -296,30 +296,34 @@ typedef enum { #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -#ifdef __KERNEL__ static inline int xfs_sb_good_version(xfs_sb_t *sbp) { - return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \ - (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \ - ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \ - (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \ - (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN))); -} + /* We always support version 1-3 */ + if (sbp->sb_versionnum >= XFS_SB_VERSION_1 && + sbp->sb_versionnum <= XFS_SB_VERSION_3) + return 1; + + /* We support version 4 if all feature bits are supported */ + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) { + if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || + ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && + (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) + return 0; + +#ifdef __KERNEL__ + if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN) + return 0; #else -static inline int xfs_sb_good_version(xfs_sb_t *sbp) -{ - return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \ - (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \ - ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \ - (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \ - (!(sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \ - (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN)))); + if ((sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) && + sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN) + return 0; +#endif + + return 1; + } + + return 0; } -#endif /* __KERNEL__ */ /* * Detect a mismatched features2 field. Older kernels read/wrote @@ -332,123 +336,127 @@ static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp) static inline unsigned xfs_sb_version_tonew(unsigned v) { - return ((((v) == XFS_SB_VERSION_1) ? \ - 0 : \ - (((v) == XFS_SB_VERSION_2) ? \ - XFS_SB_VERSION_ATTRBIT : \ - (XFS_SB_VERSION_ATTRBIT | XFS_SB_VERSION_NLINKBIT))) | \ - XFS_SB_VERSION_4); + if (v == XFS_SB_VERSION_1) + return XFS_SB_VERSION_4; + + if (v == XFS_SB_VERSION_2) + return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; + + return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT | + XFS_SB_VERSION_NLINKBIT; } static inline unsigned xfs_sb_version_toold(unsigned v) { - return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \ - 0 : \ - (((v) & XFS_SB_VERSION_NLINKBIT) ? \ - XFS_SB_VERSION_3 : \ - (((v) & XFS_SB_VERSION_ATTRBIT) ? \ - XFS_SB_VERSION_2 : \ - XFS_SB_VERSION_1))); + if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) + return 0; + if (v & XFS_SB_VERSION_NLINKBIT) + return XFS_SB_VERSION_3; + if (v & XFS_SB_VERSION_ATTRBIT) + return XFS_SB_VERSION_2; + return XFS_SB_VERSION_1; } static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp) { - return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \ - ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); + return sbp->sb_versionnum == XFS_SB_VERSION_2 || + sbp->sb_versionnum == XFS_SB_VERSION_3 || + (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); } static inline void xfs_sb_version_addattr(xfs_sb_t *sbp) { - (sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \ - XFS_SB_VERSION_2 : \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) ? \ - ((sbp)->sb_versionnum | XFS_SB_VERSION_ATTRBIT) : \ - (XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT))); + if (sbp->sb_versionnum == XFS_SB_VERSION_1) + sbp->sb_versionnum = XFS_SB_VERSION_2; + else if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) + sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; + else + sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; } static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) { - return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); + return sbp->sb_versionnum == XFS_SB_VERSION_3 || + (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); } static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp) { - (sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \ - XFS_SB_VERSION_3 : \ - ((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT)); + if (sbp->sb_versionnum <= XFS_SB_VERSION_2) + sbp->sb_versionnum = XFS_SB_VERSION_3; + else + sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT; } static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); } static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) { - (sbp)->sb_versionnum = \ - (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \ - ((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \ - (xfs_sb_version_tonew((sbp)->sb_versionnum) | \ - XFS_SB_VERSION_QUOTABIT)); + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) + sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; + else + sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) | + XFS_SB_VERSION_QUOTABIT; } static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); } static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); } static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); } static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); } static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); } static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); } static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); } static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); } static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); } /* @@ -463,22 +471,20 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) { - return (xfs_sb_version_hasmorebits(sbp) && \ - ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); + return xfs_sb_version_hasmorebits(sbp) && + (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT); } static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) { - return (xfs_sb_version_hasmorebits(sbp)) && \ - ((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); + return xfs_sb_version_hasmorebits(sbp) && + (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); } static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) { - ((sbp)->sb_versionnum = \ - ((sbp)->sb_versionnum | XFS_SB_VERSION_MOREBITSBIT), \ - ((sbp)->sb_features2 = \ - ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT))); + sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; + sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; } static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) -- cgit v1.2.3 From 6bd16ff27060819d16b3e7abe59b6644b349aea3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:32 +0100 Subject: kill dead inode flags There are a few inode flags around that aren't used anywhere, so remove them. Also update xfsidbg to display all used inode flags correctly. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_aops.c | 11 +++-------- fs/xfs/linux-2.6/xfs_file.c | 10 ++-------- fs/xfs/linux-2.6/xfs_iops.c | 5 ----- fs/xfs/linux-2.6/xfs_super.c | 1 - fs/xfs/xfs_iget.c | 1 - fs/xfs/xfs_inode.h | 17 ++++++----------- 6 files changed, 11 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 8fbc97df360..bb224d07e1e 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -317,14 +317,9 @@ xfs_map_blocks( xfs_iomap_t *mapp, int flags) { - xfs_inode_t *ip = XFS_I(inode); - int error, nmaps = 1; - - error = xfs_iomap(ip, offset, count, - flags, mapp, &nmaps); - if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) - xfs_iflags_set(ip, XFS_IMODIFIED); - return -error; + int nmaps = 1; + + return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); } STATIC_INLINE int diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index d377db05d80..f999d20a429 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -281,11 +281,8 @@ xfs_file_ioctl( unsigned int cmd, unsigned long p) { - int error; struct inode *inode = filp->f_path.dentry->d_inode; - error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); /* NOTE: some of the ioctl's return positive #'s as a * byte count indicating success, such as @@ -293,7 +290,7 @@ xfs_file_ioctl( * like most other routines. This means true * errors need to be returned as a negative value. */ - return error; + return xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); } STATIC long @@ -302,11 +299,8 @@ xfs_file_ioctl_invis( unsigned int cmd, unsigned long p) { - int error; struct inode *inode = filp->f_path.dentry->d_inode; - error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); /* NOTE: some of the ioctl's return positive #'s as a * byte count indicating success, such as @@ -314,7 +308,7 @@ xfs_file_ioctl_invis( * like most other routines. This means true * errors need to be returned as a negative value. */ - return error; + return xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p); } /* diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 76b570dd1ab..7aa53fefc67 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -159,8 +159,6 @@ xfs_init_security( } error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); - if (!error) - xfs_iflags_set(ip, XFS_IMODIFIED); kfree(name); kfree(value); @@ -261,7 +259,6 @@ xfs_vn_mknod( error = _ACL_INHERIT(inode, mode, default_acl); if (unlikely(error)) goto out_cleanup_inode; - xfs_iflags_set(ip, XFS_IMODIFIED); _ACL_FREE(default_acl); } @@ -377,7 +374,6 @@ xfs_vn_link( if (unlikely(error)) return -error; - xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; @@ -888,7 +884,6 @@ xfs_setup_inode( inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); - xfs_iflags_clear(ip, XFS_IMODIFIED); switch (inode->i_mode & S_IFMT) { case S_IFREG: diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 5389f077874..37f2d11be4a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1025,7 +1025,6 @@ xfs_fs_clear_inode( XFS_STATS_DEC(vn_active); xfs_inactive(ip); - xfs_iflags_clear(ip, XFS_IMODIFIED); } STATIC void diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index f58e5e000fc..82eb8ed5633 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -362,7 +362,6 @@ again: } xfs_put_perag(mp, pag); - xfs_iflags_set(ip, XFS_IMODIFIED); *ipp = ip; ASSERT(ip->i_df.if_ext_max == diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 03ae96bdae7..ddac1b806c0 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -403,17 +403,12 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) /* * In-core inode flags. */ -#define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ -#define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ -#define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ -#define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ -#define XFS_ISTALE 0x0010 /* inode has been staled */ -#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ -#define XFS_INEW 0x0040 -#define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */ -#define XFS_IMODIFIED 0x0100 /* XFS inode state possibly differs */ - /* to the Linux inode state. */ -#define XFS_ITRUNCATED 0x0200 /* truncated down so flush-on-close */ +#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ +#define XFS_ISTALE 0x0002 /* inode has been staled */ +#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ +#define XFS_INEW 0x0008 /* inode has just been allocated */ +#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ +#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ /* * Flags for inode locking. -- cgit v1.2.3 From e88f11abe09d14718b82a991db118c5e485aa897 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:33 +0100 Subject: remove unused m_inode_quiesce member from struct xfs_mount Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_mount.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 38252554b1c..cfdac80f4fc 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -302,7 +302,6 @@ typedef struct xfs_mount { int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ - __uint8_t m_inode_quiesce;/* call quiesce on new inodes. */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ int m_dirblksize; /* directory block sz--bytes */ -- cgit v1.2.3 From b56757becf8bc62292263a24a23cf55edb4be55f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:34 +0100 Subject: remove leftovers of shared read-only support We never supported shared read-only filesystems, so remove the dead code left over from IRIX for it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_super.c | 29 ----------------------------- fs/xfs/xfs_mount.c | 26 -------------------------- fs/xfs/xfs_mount.h | 2 -- 3 files changed, 57 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 37f2d11be4a..c4f788e1119 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1388,35 +1388,6 @@ xfs_finish_flags( return XFS_ERROR(EROFS); } -#if 0 /* shared mounts were never supported on Linux */ - /* - * check for shared mount. - */ - if (ap->flags & XFSMNT_SHARED) { - if (!xfs_sb_version_hasshared(&mp->m_sb)) - return XFS_ERROR(EINVAL); - - /* - * For IRIX 6.5, shared mounts must have the shared - * version bit set, have the persistent readonly - * field set, must be version 0 and can only be mounted - * read-only. - */ - if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) || - (mp->m_sb.sb_shared_vn != 0)) - return XFS_ERROR(EINVAL); - - mp->m_flags |= XFS_MOUNT_SHARED; - - /* - * Shared XFS V0 can't deal with DMI. Return EINVAL. - */ - if (mp->m_sb.sb_shared_vn == 0 && - (mp->m_flags & XFS_MOUNT_DMAPI)) - return XFS_ERROR(EINVAL); - } -#endif - return 0; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3f999b1c038..1672ff56a00 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1352,24 +1352,6 @@ xfs_log_sbcount( return error; } -STATIC void -xfs_mark_shared_ro( - xfs_mount_t *mp, - xfs_buf_t *bp) -{ - xfs_dsb_t *sb = XFS_BUF_TO_SBP(bp); - __uint16_t version; - - if (!(sb->sb_flags & XFS_SBF_READONLY)) - sb->sb_flags |= XFS_SBF_READONLY; - - version = be16_to_cpu(sb->sb_versionnum); - if ((version & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4 || - !(version & XFS_SB_VERSION_SHAREDBIT)) - version |= XFS_SB_VERSION_SHAREDBIT; - sb->sb_versionnum = cpu_to_be16(version); -} - int xfs_unmountfs_writesb(xfs_mount_t *mp) { @@ -1385,12 +1367,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) sbp = xfs_getsb(mp, 0); - /* - * mark shared-readonly if desired - */ - if (mp->m_mk_sharedro) - xfs_mark_shared_ro(mp, sbp); - XFS_BUF_UNDONE(sbp); XFS_BUF_UNREAD(sbp); XFS_BUF_UNDELAYWRITE(sbp); @@ -1402,8 +1378,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) if (error) xfs_ioerror_alert("xfs_unmountfs_writesb", mp, sbp, XFS_BUF_ADDR(sbp)); - if (error && mp->m_mk_sharedro) - xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); xfs_buf_relse(sbp); } return error; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index cfdac80f4fc..2ab2df57b69 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -301,7 +301,6 @@ typedef struct xfs_mount { int m_sinoalign; /* stripe unit inode alignment */ int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ - __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ int m_dirblksize; /* directory block sz--bytes */ @@ -349,7 +348,6 @@ typedef struct xfs_mount { #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ #define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ #define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ -#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */ #define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */ #define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */ /* osyncisdsync is now default*/ -- cgit v1.2.3 From 070c4616ec62fc207e2aeef9d0f28af294c651d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:35 +0100 Subject: use xfs_trans_ijoin in xfs_trans_iget Use xfs_trans_ijoin in xfs_trans_iget in case we need to join an inode into a transaction instead of opencoding it. Based on a discussion with and an incomplete patch from Niv Sardi. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/xfs_trans_inode.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 2a1c0f071f9..23d276af2e0 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -85,7 +85,6 @@ xfs_trans_iget( { int error; xfs_inode_t *ip; - xfs_inode_log_item_t *iip; /* * If the transaction pointer is NULL, just call the normal @@ -138,34 +137,7 @@ xfs_trans_iget( } ASSERT(ip != NULL); - /* - * Get a log_item_desc to point at the new item. - */ - if (ip->i_itemp == NULL) - xfs_inode_item_init(ip, mp); - iip = ip->i_itemp; - (void) xfs_trans_add_item(tp, (xfs_log_item_t *)(iip)); - - xfs_trans_inode_broot_debug(ip); - - /* - * If the IO lock has been acquired, mark that in - * the inode log item so we'll know to unlock it - * when the transaction commits. - */ - ASSERT(iip->ili_flags == 0); - if (lock_flags & XFS_IOLOCK_EXCL) { - iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL; - } else if (lock_flags & XFS_IOLOCK_SHARED) { - iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED; - } - - /* - * Initialize i_transp so we can find it with xfs_inode_incore() - * above. - */ - ip->i_transp = tp; - + xfs_trans_ijoin(tp, ip, lock_flags); *ipp = ip; return 0; } -- cgit v1.2.3 From e57481dc269cd3773b22f53bfb869308780a7bf1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:36 +0100 Subject: no explicit xfs_iflush for special inodes during unmount Currently we explicitly call xfs_iflush on the quota, real-time and root inodes from xfs_unmount_flush. But we just called xfs_sync_inodes with SYNC_ATTR and do an XFS_bflush aka xfs_flush_buftarg to make sure all inodes are on disk already, so there is no need for these special cases. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_vnode.h | 5 ---- fs/xfs/quota/xfs_qm.c | 44 ++++++---------------------- fs/xfs/quota/xfs_qm.h | 2 +- fs/xfs/xfs_mount.h | 2 +- fs/xfs/xfs_vfsops.c | 68 ++++---------------------------------------- 5 files changed, 15 insertions(+), 106 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index fb49e0f42d3..314ca18bc60 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -65,11 +65,6 @@ extern void vn_iowait(struct xfs_inode *ip); extern void vn_iowake(struct xfs_inode *ip); extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); -static inline int vn_count(struct inode *vp) -{ - return atomic_read(&vp->i_count); -} - #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 5b198d15e76..6b13960cf31 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -395,13 +395,10 @@ xfs_qm_mount_quotas( /* * Called from the vfsops layer. */ -int +void xfs_qm_unmount_quotas( xfs_mount_t *mp) { - xfs_inode_t *uqp, *gqp; - int error = 0; - /* * Release the dquots that root inode, et al might be holding, * before we flush quotas and blow away the quotainfo structure. @@ -414,43 +411,18 @@ xfs_qm_unmount_quotas( xfs_qm_dqdetach(mp->m_rsumip); /* - * Flush out the quota inodes. + * Release the quota inodes. */ - uqp = gqp = NULL; if (mp->m_quotainfo) { - if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) { - xfs_ilock(uqp, XFS_ILOCK_EXCL); - xfs_iflock(uqp); - error = xfs_iflush(uqp, XFS_IFLUSH_SYNC); - xfs_iunlock(uqp, XFS_ILOCK_EXCL); - if (unlikely(error == EFSCORRUPTED)) { - XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)", - XFS_ERRLEVEL_LOW, mp); - goto out; - } + if (mp->m_quotainfo->qi_uquotaip) { + IRELE(mp->m_quotainfo->qi_uquotaip); + mp->m_quotainfo->qi_uquotaip = NULL; } - if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) { - xfs_ilock(gqp, XFS_ILOCK_EXCL); - xfs_iflock(gqp); - error = xfs_iflush(gqp, XFS_IFLUSH_SYNC); - xfs_iunlock(gqp, XFS_ILOCK_EXCL); - if (unlikely(error == EFSCORRUPTED)) { - XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)", - XFS_ERRLEVEL_LOW, mp); - goto out; - } + if (mp->m_quotainfo->qi_gquotaip) { + IRELE(mp->m_quotainfo->qi_gquotaip); + mp->m_quotainfo->qi_gquotaip = NULL; } } - if (uqp) { - IRELE(uqp); - mp->m_quotainfo->qi_uquotaip = NULL; - } - if (gqp) { - IRELE(gqp); - mp->m_quotainfo->qi_gquotaip = NULL; - } -out: - return XFS_ERROR(error); } /* diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 4f2de977172..ddf09166387 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -167,7 +167,7 @@ extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); extern void xfs_qm_mount_quotas(xfs_mount_t *); extern int xfs_qm_quotacheck(xfs_mount_t *); extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); -extern int xfs_qm_unmount_quotas(xfs_mount_t *); +extern void xfs_qm_unmount_quotas(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); extern int xfs_qm_sync(xfs_mount_t *, int); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 2ab2df57b69..4f64fd160cc 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -117,7 +117,7 @@ struct xfs_quotainfo; typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); -typedef int (*xfs_qmunmount_t)(struct xfs_mount *); +typedef void (*xfs_qmunmount_t)(struct xfs_mount *); typedef void (*xfs_qmdone_t)(struct xfs_mount *); typedef void (*xfs_dqrele_t)(struct xfs_dquot *); typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index cad3da36fb2..559fb8d5108 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -68,74 +68,16 @@ xfs_unmount_flush( rid of. */ int relocation) /* Called from vfs relocation. */ { - xfs_inode_t *rip = mp->m_rootip; - xfs_inode_t *rbmip; - xfs_inode_t *rsumip = NULL; - int error; - - xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); - xfs_iflock(rip); - - /* - * Flush out the real time inodes. - */ - if ((rbmip = mp->m_rbmip) != NULL) { - xfs_ilock(rbmip, XFS_ILOCK_EXCL); - xfs_iflock(rbmip); - error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC); - xfs_iunlock(rbmip, XFS_ILOCK_EXCL); - - if (error == EFSCORRUPTED) - goto fscorrupt_out; - - ASSERT(vn_count(VFS_I(rbmip)) == 1); - - rsumip = mp->m_rsumip; - xfs_ilock(rsumip, XFS_ILOCK_EXCL); - xfs_iflock(rsumip); - error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC); - xfs_iunlock(rsumip, XFS_ILOCK_EXCL); - - if (error == EFSCORRUPTED) - goto fscorrupt_out; - - ASSERT(vn_count(VFS_I(rsumip)) == 1); - } - - /* - * Synchronously flush root inode to disk - */ - error = xfs_iflush(rip, XFS_IFLUSH_SYNC); - if (error == EFSCORRUPTED) - goto fscorrupt_out2; - - if (vn_count(VFS_I(rip)) != 1 && !relocation) { - xfs_iunlock(rip, XFS_ILOCK_EXCL); - return XFS_ERROR(EBUSY); - } - /* * Release dquot that rootinode, rbmino and rsumino might be holding, * flush and purge the quota inodes. */ - error = XFS_QM_UNMOUNT(mp); - if (error == EFSCORRUPTED) - goto fscorrupt_out2; + XFS_QM_UNMOUNT(mp); - if (rbmip) { - IRELE(rbmip); - IRELE(rsumip); - } + if (mp->m_rbmip) + IRELE(mp->m_rbmip); + if (mp->m_rsumip) + IRELE(mp->m_rsumip); - xfs_iunlock(rip, XFS_ILOCK_EXCL); return 0; - -fscorrupt_out: - xfs_ifunlock(rip); - -fscorrupt_out2: - xfs_iunlock(rip, XFS_ILOCK_EXCL); - - return XFS_ERROR(EFSCORRUPTED); } - -- cgit v1.2.3 From f95099ba5ae06b96a9c17ef93cc655f686d79077 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:37 +0100 Subject: kill xfs_unmount_flush There's almost nothing left in this function, instead remove the IRELE on the real times inodes and the call to XFS_QM_UNMOUNT into xfs_unmountfs. For the regular unmount case that means it now also happenes after dmapi notification, but otherwise there is no difference in behaviour. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/Makefile | 1 - fs/xfs/linux-2.6/xfs_super.c | 5 --- fs/xfs/xfs_mount.c | 10 ++++++ fs/xfs/xfs_mount.h | 1 - fs/xfs/xfs_vfsops.c | 83 -------------------------------------------- 5 files changed, 10 insertions(+), 90 deletions(-) delete mode 100644 fs/xfs/xfs_vfsops.c (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 51b87de97f8..a5a9ef0ce84 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -85,7 +85,6 @@ xfs-y += xfs_alloc.o \ xfs_trans_inode.o \ xfs_trans_item.o \ xfs_utils.o \ - xfs_vfsops.o \ xfs_vnodeops.o \ xfs_rw.o \ xfs_dmops.o \ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c4f788e1119..4ebbd6820e7 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1043,7 +1043,6 @@ xfs_fs_put_super( struct xfs_mount *mp = XFS_M(sb); struct xfs_inode *rip = mp->m_rootip; int unmount_event_flags = 0; - int error; xfs_syncd_stop(mp); xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); @@ -1071,8 +1070,6 @@ xfs_fs_put_super( xfs_filestream_unmount(mp); XFS_bflush(mp->m_ddev_targp); - error = xfs_unmount_flush(mp, 0); - WARN_ON(error); if (mp->m_flags & XFS_MOUNT_DMAPI) { XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, @@ -1535,8 +1532,6 @@ xfs_fs_fill_super( xfs_filestream_unmount(mp); XFS_bflush(mp->m_ddev_targp); - error = xfs_unmount_flush(mp, 0); - WARN_ON(error); xfs_unmountfs(mp); goto out_free_sb; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 1672ff56a00..3c97c6463a4 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1220,6 +1220,16 @@ xfs_unmountfs( __uint64_t resblks; int error; + /* + * Release dquot that rootinode, rbmino and rsumino might be holding, + * and release the quota inodes. + */ + XFS_QM_UNMOUNT(mp); + + if (mp->m_rbmip) + IRELE(mp->m_rbmip); + if (mp->m_rsumip) + IRELE(mp->m_rsumip); IRELE(mp->m_rootip); /* diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4f64fd160cc..ae5da88ace2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -509,7 +509,6 @@ extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); extern int xfs_unmountfs_writesb(xfs_mount_t *); -extern int xfs_unmount_flush(xfs_mount_t *, int); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, int64_t, int); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c deleted file mode 100644 index 559fb8d5108..00000000000 --- a/fs/xfs/xfs_vfsops.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" -#include "xfs_quota.h" -#include "xfs_error.h" -#include "xfs_bmap.h" -#include "xfs_rw.h" -#include "xfs_buf_item.h" -#include "xfs_log_priv.h" -#include "xfs_dir2_trace.h" -#include "xfs_extfree_item.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_mru_cache.h" -#include "xfs_filestream.h" -#include "xfs_fsops.h" -#include "xfs_vnodeops.h" -#include "xfs_utils.h" -#include "xfs_sync.h" - - -/* - * xfs_unmount_flush implements a set of flush operation on special - * inodes, which are needed as a separate set of operations so that - * they can be called as part of relocation process. - */ -int -xfs_unmount_flush( - xfs_mount_t *mp, /* Mount structure we are getting - rid of. */ - int relocation) /* Called from vfs relocation. */ -{ - /* - * Release dquot that rootinode, rbmino and rsumino might be holding, - * flush and purge the quota inodes. - */ - XFS_QM_UNMOUNT(mp); - - if (mp->m_rbmip) - IRELE(mp->m_rbmip); - if (mp->m_rsumip) - IRELE(mp->m_rsumip); - - return 0; -} -- cgit v1.2.3 From 583fa586f0e4a8222dd091ce971b85c1364f3d92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:38 +0100 Subject: kill vn_ioerror There's just one caller of this helper, and it's much cleaner to just merge the xfs_do_force_shutdown call into it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_aops.c | 17 +++++++++++++---- fs/xfs/linux-2.6/xfs_vnode.c | 16 ---------------- fs/xfs/linux-2.6/xfs_vnode.h | 1 - 3 files changed, 13 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index bb224d07e1e..f35dba9bf1d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -146,16 +146,25 @@ xfs_destroy_ioend( xfs_ioend_t *ioend) { struct buffer_head *bh, *next; + struct xfs_inode *ip = XFS_I(ioend->io_inode); for (bh = ioend->io_buffer_head; bh; bh = next) { next = bh->b_private; bh->b_end_io(bh, !ioend->io_error); } - if (unlikely(ioend->io_error)) { - vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error, - __FILE__,__LINE__); + + /* + * Volume managers supporting multiple paths can send back ENODEV + * when the final path disappears. In this case continuing to fill + * the page cache with dirty data which cannot be written out is + * evil, so prevent that. + */ + if (unlikely(ioend->io_error == -ENODEV)) { + xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, + __FILE__, __LINE__); } - vn_iowake(XFS_I(ioend->io_inode)); + + vn_iowake(ip); mempool_free(ioend, xfs_ioend_pool); } diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index ad18262d651..a8cf97a4319 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -66,22 +66,6 @@ vn_iowake( wake_up(vptosync(ip)); } -/* - * Volume managers supporting multiple paths can send back ENODEV when the - * final path disappears. In this case continuing to fill the page cache - * with dirty data which cannot be written out is evil, so prevent that. - */ -void -vn_ioerror( - xfs_inode_t *ip, - int error, - char *f, - int l) -{ - if (unlikely(error == -ENODEV)) - xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); -} - #ifdef XFS_INODE_TRACE #define KTRACE_ENTER(ip, vk, s, line, ra) \ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 314ca18bc60..07fed8837db 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -63,7 +63,6 @@ extern void vn_init(void); */ extern void vn_iowait(struct xfs_inode *ip); extern void vn_iowake(struct xfs_inode *ip); -extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); #define IHOLD(ip) \ do { \ -- cgit v1.2.3 From 25e41b3d521f52771354a718042a753a3e77df0a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:39 +0100 Subject: move vn_iowait / vn_iowake into xfs_aops.c The whole machinery to wait on I/O completion is related to the I/O path and should be there instead of in xfs_vnode.c. Also give the functions more descriptive names. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/linux-2.6/xfs_aops.c | 38 ++++++++++++++++++++++++++++++++++++-- fs/xfs/linux-2.6/xfs_aops.h | 3 +++ fs/xfs/linux-2.6/xfs_super.c | 2 +- fs/xfs/linux-2.6/xfs_sync.c | 2 +- fs/xfs/linux-2.6/xfs_vnode.c | 34 ---------------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 10 ---------- fs/xfs/xfs_inode.c | 6 +++--- fs/xfs/xfs_vnodeops.c | 7 ++++--- 8 files changed, 48 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f35dba9bf1d..de3a198f771 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -42,6 +42,40 @@ #include #include + +/* + * Prime number of hash buckets since address is used as the key. + */ +#define NVSYNC 37 +#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) +static wait_queue_head_t xfs_ioend_wq[NVSYNC]; + +void __init +xfs_ioend_init(void) +{ + int i; + + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&xfs_ioend_wq[i]); +} + +void +xfs_ioend_wait( + xfs_inode_t *ip) +{ + wait_queue_head_t *wq = to_ioend_wq(ip); + + wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); +} + +STATIC void +xfs_ioend_wake( + xfs_inode_t *ip) +{ + if (atomic_dec_and_test(&ip->i_iocount)) + wake_up(to_ioend_wq(ip)); +} + STATIC void xfs_count_page_state( struct page *page, @@ -164,7 +198,7 @@ xfs_destroy_ioend( __FILE__, __LINE__); } - vn_iowake(ip); + xfs_ioend_wake(ip); mempool_free(ioend, xfs_ioend_pool); } @@ -516,7 +550,7 @@ xfs_cancel_ioend( unlock_buffer(bh); } while ((bh = next_bh) != NULL); - vn_iowake(XFS_I(ioend->io_inode)); + xfs_ioend_wake(XFS_I(ioend->io_inode)); mempool_free(ioend, xfs_ioend_pool); } while ((ioend = next) != NULL); } diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 3ba0631a381..7b26f5ff969 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -43,4 +43,7 @@ typedef struct xfs_ioend { extern const struct address_space_operations xfs_address_space_operations; extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); +extern void xfs_ioend_init(void); +extern void xfs_ioend_wait(struct xfs_inode *); + #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 4ebbd6820e7..36f6cc703ef 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1822,7 +1822,7 @@ init_xfs_fs(void) XFS_BUILD_OPTIONS " enabled\n"); ktrace_init(64); - vn_init(); + xfs_ioend_init(); xfs_dir_startup(); error = xfs_init_zones(); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d12d31b86fa..ca5bd2951a8 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -133,7 +133,7 @@ xfs_sync_inodes_ag( lock_flags |= XFS_IOLOCK_SHARED; error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); if (flags & SYNC_IOWAIT) - vn_iowait(ip); + xfs_ioend_wait(ip); } xfs_ilock(ip, XFS_ILOCK_SHARED); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index a8cf97a4319..f6d14112279 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -32,40 +32,6 @@ #include "xfs_mount.h" -/* - * Dedicated vnode inactive/reclaim sync wait queues. - * Prime number of hash buckets since address is used as the key. - */ -#define NVSYNC 37 -#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) -static wait_queue_head_t vsync[NVSYNC]; - -void __init -vn_init(void) -{ - int i; - - for (i = 0; i < NVSYNC; i++) - init_waitqueue_head(&vsync[i]); -} - -void -vn_iowait( - xfs_inode_t *ip) -{ - wait_queue_head_t *wq = vptosync(ip); - - wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); -} - -void -vn_iowake( - xfs_inode_t *ip) -{ - if (atomic_dec_and_test(&ip->i_iocount)) - wake_up(vptosync(ip)); -} - #ifdef XFS_INODE_TRACE #define KTRACE_ENTER(ip, vk, s, line, ra) \ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 07fed8837db..bd3e05c4790 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -54,16 +54,6 @@ struct attrlist_cursor_kern; Prevent VM access to the pages until the operation completes. */ - -extern void vn_init(void); - -/* - * Yeah, these don't take vnode anymore at all, all this should be - * cleaned up at some point. - */ -extern void vn_iowait(struct xfs_inode *ip); -extern void vn_iowake(struct xfs_inode *ip); - #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4e664f57860..063da344e18 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1322,8 +1322,8 @@ xfs_itrunc_trace( * direct I/O with the truncate operation. Also, because we hold * the IOLOCK in exclusive mode, we prevent new direct I/Os from being * started until the truncate completes and drops the lock. Essentially, - * the vn_iowait() call forms an I/O barrier that provides strict ordering - * between direct I/Os and the truncate operation. + * the xfs_ioend_wait() call forms an I/O barrier that provides strict + * ordering between direct I/Os and the truncate operation. * * The flags parameter can have either the value XFS_ITRUNC_DEFINITE * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used @@ -1354,7 +1354,7 @@ xfs_itruncate_start( /* wait for the completion of any pending DIOs */ if (new_size == 0 || new_size < ip->i_size) - vn_iowait(ip); + xfs_ioend_wait(ip); /* * Call toss_pages or flushinval_pages to get rid of pages diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b29a0eb9c0f..2d57aae0e31 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -338,7 +338,7 @@ xfs_setattr( } /* wait for all I/O to complete */ - vn_iowait(ip); + xfs_ioend_wait(ip); if (!code) code = xfs_itruncate_data(ip, iattr->ia_size); @@ -2758,7 +2758,7 @@ xfs_reclaim( return 0; } - vn_iowait(ip); + xfs_ioend_wait(ip); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -3149,7 +3149,8 @@ xfs_free_file_space( need_iolock = 0; if (need_iolock) { xfs_ilock(ip, XFS_IOLOCK_EXCL); - vn_iowait(ip); /* wait for the completion of any pending DIOs */ + /* wait for the completion of any pending DIOs */ + xfs_ioend_wait(ip); } rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); -- cgit v1.2.3 From 5a8d0f3c7af801c7263fbba39952504d6fc7ff60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:40 +0100 Subject: move inode tracing out of xfs_vnode. Move the inode tracing into xfs_iget.c / xfs_inode.h and kill xfs_vnode.c now that it's empty. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi --- fs/xfs/Makefile | 1 - fs/xfs/linux-2.6/xfs_vnode.c | 82 -------------------------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 48 -------------------------- fs/xfs/xfs_iget.c | 48 ++++++++++++++++++++++++++ fs/xfs/xfs_inode.h | 45 ++++++++++++++++++++++++ 5 files changed, 93 insertions(+), 131 deletions(-) delete mode 100644 fs/xfs/linux-2.6/xfs_vnode.c (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index a5a9ef0ce84..c3dc491fff8 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -107,7 +107,6 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_lrw.o \ xfs_super.o \ xfs_sync.o \ - xfs_vnode.o \ xfs_xattr.o) # Objects in support/ diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c deleted file mode 100644 index f6d14112279..00000000000 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_vnodeops.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" - -/* - * And this gunk is needed for xfs_mount.h" - */ -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_dmapi.h" -#include "xfs_inum.h" -#include "xfs_ag.h" -#include "xfs_mount.h" - - -#ifdef XFS_INODE_TRACE - -#define KTRACE_ENTER(ip, vk, s, line, ra) \ - ktrace_enter( (ip)->i_trace, \ -/* 0 */ (void *)(__psint_t)(vk), \ -/* 1 */ (void *)(s), \ -/* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ -/* 4 */ (void *)(ra), \ -/* 5 */ NULL, \ -/* 6 */ (void *)(__psint_t)current_cpu(), \ -/* 7 */ (void *)(__psint_t)current_pid(), \ -/* 8 */ (void *)__return_address, \ -/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) - -/* - * Vnode tracing code. - */ -void -_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); -} - -void -_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); -} - -void -xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); -} - -void -_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); -} - -void -xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); -} -#endif /* XFS_INODE_TRACE */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index bd3e05c4790..f65983a230d 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -54,19 +54,6 @@ struct attrlist_cursor_kern; Prevent VM access to the pages until the operation completes. */ -#define IHOLD(ip) \ -do { \ - ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ - atomic_inc(&(VFS_I(ip)->i_count)); \ - xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ -} while (0) - -#define IRELE(ip) \ -do { \ - xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ - iput(VFS_I(ip)); \ -} while (0) - /* * Dealing with bad inodes */ @@ -103,39 +90,4 @@ static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt) PAGECACHE_TAG_DIRTY) -/* - * Tracking vnode activity. - */ -#if defined(XFS_INODE_TRACE) - -#define INODE_TRACE_SIZE 16 /* number of trace entries */ -#define INODE_KTRACE_ENTRY 1 -#define INODE_KTRACE_EXIT 2 -#define INODE_KTRACE_HOLD 3 -#define INODE_KTRACE_REF 4 -#define INODE_KTRACE_RELE 5 - -extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *); -extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *); -extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); -extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); -extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); -#define xfs_itrace_entry(ip) \ - _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) -#define xfs_itrace_exit(ip) \ - _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) -#define xfs_itrace_exit_tag(ip, tag) \ - _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) -#define xfs_itrace_ref(ip) \ - _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address) - -#else -#define xfs_itrace_entry(a) -#define xfs_itrace_exit(a) -#define xfs_itrace_exit_tag(a, b) -#define xfs_itrace_hold(a, b, c, d) -#define xfs_itrace_ref(a) -#define xfs_itrace_rele(a, b, c, d) -#endif - #endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 82eb8ed5633..e2fb6210d4c 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -805,3 +805,51 @@ xfs_isilocked( } #endif +#ifdef XFS_INODE_TRACE + +#define KTRACE_ENTER(ip, vk, s, line, ra) \ + ktrace_enter((ip)->i_trace, \ +/* 0 */ (void *)(__psint_t)(vk), \ +/* 1 */ (void *)(s), \ +/* 2 */ (void *)(__psint_t) line, \ +/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ +/* 4 */ (void *)(ra), \ +/* 5 */ NULL, \ +/* 6 */ (void *)(__psint_t)current_cpu(), \ +/* 7 */ (void *)(__psint_t)current_pid(), \ +/* 8 */ (void *)__return_address, \ +/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) + +/* + * Vnode tracing code. + */ +void +_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); +} + +void +_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); +} + +void +xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); +} + +void +_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); +} + +void +xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); +} +#endif /* XFS_INODE_TRACE */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ddac1b806c0..558253e6fb4 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -536,6 +536,51 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); void xfs_synchronize_atime(xfs_inode_t *); void xfs_mark_inode_dirty_sync(xfs_inode_t *); +#if defined(XFS_INODE_TRACE) + +#define INODE_TRACE_SIZE 16 /* number of trace entries */ +#define INODE_KTRACE_ENTRY 1 +#define INODE_KTRACE_EXIT 2 +#define INODE_KTRACE_HOLD 3 +#define INODE_KTRACE_REF 4 +#define INODE_KTRACE_RELE 5 + +extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *); +extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *); +extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); +extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); +extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); +#define xfs_itrace_entry(ip) \ + _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) +#define xfs_itrace_exit(ip) \ + _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) +#define xfs_itrace_exit_tag(ip, tag) \ + _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) +#define xfs_itrace_ref(ip) \ + _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address) + +#else +#define xfs_itrace_entry(a) +#define xfs_itrace_exit(a) +#define xfs_itrace_exit_tag(a, b) +#define xfs_itrace_hold(a, b, c, d) +#define xfs_itrace_ref(a) +#define xfs_itrace_rele(a, b, c, d) +#endif + +#define IHOLD(ip) \ +do { \ + ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ + atomic_inc(&(VFS_I(ip)->i_count)); \ + xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ +} while (0) + +#define IRELE(ip) \ +do { \ + xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ + iput(VFS_I(ip)); \ +} while (0) + #endif /* __KERNEL__ */ int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, -- cgit v1.2.3 From 8bb57320f3f5dd8c2373c0b66e4950391e037109 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 4 Dec 2008 14:23:27 +0100 Subject: [XFS] Fix compile with CONFIG_COMPAT enabled Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index a3446aad070..d92131c827b 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -74,7 +74,7 @@ xfs_file_compat_ioctl( unsigned long arg); extern long -xfs_file_compat_ioctl_invis( +xfs_file_compat_invis_ioctl( struct file *file, unsigned int cmd, unsigned long arg); -- cgit v1.2.3 From 6a0775a991d5597ce98f1e15373288ea133cc793 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2008 09:09:34 +1100 Subject: [XFS] Fix hang after disallowed rename across directory quota domains When project quota is active and is being used for directory tree quota control, we disallow rename outside the current directory tree. This requires a check to be made after all the inodes involved in the rename are locked. We fail to unlock the inodes correctly if we disallow the rename when the target is outside the current directory tree. This results in a hang on the next access to the inodes involved in failed rename. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Dave Chinner Tested-by: Arkadiusz Miskiewicz Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_rename.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 02f0e8f53a9..10642fcbb1f 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -212,7 +212,7 @@ xfs_rename( if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); - xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); + xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL); xfs_trans_cancel(tp, cancel_flags); goto std_return; } -- cgit v1.2.3 From c6422617a1c0d7787e515748b01f594fe43aea98 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 5 Dec 2008 13:16:15 +1100 Subject: [XFS] Check return value of xfs_buf_get_noaddr() We check the return value of all other calls to xfs_buf_get_noaddr(). Make sense to do it here too. Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig Reviewed-by: Eric Sandeen --- fs/xfs/xfs_vnodeops.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 2d57aae0e31..4547608b46c 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -3034,6 +3034,8 @@ xfs_zero_remaining_bytes( bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp); + if (!bp) + return XFS_ERROR(ENOMEM); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); -- cgit v1.2.3 From a5b429d41fede3a90deb532f5c2318393ed3a17b Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 5 Dec 2008 13:31:51 +1100 Subject: [XFS] Remove unused variable in ktrace_free() entries_size is probably left over from when we used to pass the size to kmem_free(). Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig Reviewed-by: Eric Sandeen --- fs/xfs/support/ktrace.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index a34ef05489b..2d494c26717 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c @@ -113,21 +113,16 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) void ktrace_free(ktrace_t *ktp) { - int entries_size; - if (ktp == (ktrace_t *)NULL) return; /* * Special treatment for the Vnode trace buffer. */ - if (ktp->kt_nentries == ktrace_zentries) { + if (ktp->kt_nentries == ktrace_zentries) kmem_zone_free(ktrace_ent_zone, ktp->kt_entries); - } else { - entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t)); - + else kmem_free(ktp->kt_entries); - } kmem_zone_free(ktrace_hdr_zone, ktp); } -- cgit v1.2.3 From 797eaed40e1df4a3b9ece6894a71ce2b568bca38 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 5 Dec 2008 14:15:49 +1100 Subject: [XFS] Remove unnecessary assertion Hit this assert because an inode was tagged with XFS_ICI_RECLAIM_TAG but not XFS_IRECLAIMABLE|XFS_IRECLAIM. This is because xfs_iget_cache_hit() first clears XFS_IRECLAIMABLE and then calls __xfs_inode_clear_reclaim_tag() while only holding the pag_ici_lock in read mode so we can race with xfs_reclaim_inodes_ag(). Looks like xfs_reclaim_inodes_ag() will do the right thing anyway so just remove the assert. Thanks to Christoph for pointing out where the problem was. Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ca5bd2951a8..2ed035354c2 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -707,8 +707,6 @@ restart: break; } - ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE|XFS_IRECLAIM))); - /* ignore if already under reclaim */ if (xfs_iflags_test(ip, XFS_IRECLAIM)) { read_unlock(&pag->pag_ici_lock); -- cgit v1.2.3 From 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:31 -0800 Subject: sparse irq_desc[] array: core kernel and x86 changes Impact: new feature Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with NR_CPUS set to large values. The goal is to be able to scale up to much larger NR_IRQS value without impacting the (important) common case. To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of irq_desc pointers. When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc, this also makes the IRQ descriptors NUMA-local (to the site that calls request_irq()). This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now uses desc->chip_data for x86 to store irq_cfg. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- fs/proc/stat.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 81904f07679..a13431ab7c6 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,6 +27,7 @@ static int show_stat(struct seq_file *p, void *v) u64 sum = 0; struct timespec boottime; unsigned int per_irq_sum; + struct irq_desc *desc; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -44,10 +45,11 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - - for_each_irq_nr(j) + for_each_irq_desc(j, desc) { + if (!desc) + continue; sum += kstat_irqs_cpu(j, i); - + } sum += arch_irq_stat_cpu(i); } sum += arch_irq_stat(); @@ -90,11 +92,14 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for_each_irq_nr(j) { + for (j = 0; j < NR_IRQS; j++) { + desc = irq_to_desc(j); per_irq_sum = 0; - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); + if (desc) { + for_each_possible_cpu(i) + per_irq_sum += kstat_irqs_cpu(j, i); + } seq_printf(p, " %u", per_irq_sum); } -- cgit v1.2.3 From 240d367b4e6c6e3c5075e034db14dba60a6f5fa7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 8 Dec 2008 14:06:17 -0800 Subject: sparseirq: fix Alpha build failure Impact: build fix on Alpha -tip testing found this build failure on the Alpha defconfig: /home/mingo/tip/fs/proc/stat.c: In function 'show_stat': /home/mingo/tip/fs/proc/stat.c:48: error: implicit declaration of function 'for_each_irq_desc' /home/mingo/tip/fs/proc/stat.c:48: error: expected ';' before '{' token can not use irq_desc() in stat.c on older architectures. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- fs/proc/stat.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index a13431ab7c6..3cb9492801c 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -45,9 +45,12 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - for_each_irq_desc(j, desc) { + for_each_irq_nr(j) { +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(j); if (!desc) continue; +#endif sum += kstat_irqs_cpu(j, i); } sum += arch_irq_stat_cpu(i); @@ -92,14 +95,17 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for (j = 0; j < NR_IRQS; j++) { - desc = irq_to_desc(j); + for_each_irq_nr(j) { per_irq_sum = 0; - - if (desc) { - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(j); + if (!desc) { + seq_printf(p, " %u", per_irq_sum); + continue; } +#endif + for_each_possible_cpu(i) + per_irq_sum += kstat_irqs_cpu(j, i); seq_printf(p, " %u", per_irq_sum); } -- cgit v1.2.3 From e055f13a6d8448d4f23121b7b11340c3fb55cce6 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Wed, 10 Dec 2008 11:51:54 +1100 Subject: [XFS] Remove unused tracing code None of this code appears to be used anywhere so remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_inode.c | 2 -- fs/xfs/xfs_inode.h | 1 - 2 files changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 063da344e18..f6b4c0f5bfd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3232,8 +3232,6 @@ corrupt_out: #ifdef XFS_ILOCK_TRACE -ktrace_t *xfs_ilock_trace_buf; - void xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 558253e6fb4..64222e266df 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -223,7 +223,6 @@ struct xfs_dquot; #if defined(XFS_ILOCK_TRACE) #define XFS_ILOCK_KTRACE_SIZE 32 -extern ktrace_t *xfs_ilock_trace_buf; extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); #else #define xfs_ilock_trace(i,n,f,ra) -- cgit v1.2.3 From 15ac08a8b2c129abccf1be47b6ab09491e013db2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:30 -0500 Subject: [XFS] replace b_fspriv with b_mount Replace the b_fspriv pointer and it's ugly accessors with a properly types xfs_mount pointer. Also switch log reocvery over to it instead of using b_fspriv for the mount pointer. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.c | 4 ++-- fs/xfs/linux-2.6/xfs_buf.h | 4 +--- fs/xfs/linux-2.6/xfs_lrw.c | 11 ++++------- fs/xfs/xfs_buf_item.c | 4 ++-- fs/xfs/xfs_log_recover.c | 26 +++++++++----------------- fs/xfs/xfs_rw.c | 2 +- 6 files changed, 19 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index cd89c56715a..b9c304a629e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1085,7 +1085,7 @@ xfs_bawrite( bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); - bp->b_fspriv3 = mp; + bp->b_mount = mp; bp->b_strat = xfs_bdstrat_cb; return xfs_bdstrat_cb(bp); } @@ -1098,7 +1098,7 @@ xfs_bdwrite( XB_TRACE(bp, "bdwrite", 0); bp->b_strat = xfs_bdstrat_cb; - bp->b_fspriv3 = mp; + bp->b_mount = mp; bp->b_flags &= ~XBF_READ; bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 0e2aa16f5e4..af8764e02d7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -168,7 +168,7 @@ typedef struct xfs_buf { struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; - void *b_fspriv3; + struct xfs_mount *b_mount; unsigned short b_error; /* error code on I/O */ unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ @@ -335,8 +335,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) -#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3) -#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val)) #define XFS_BUF_SET_START(bp) do { } while (0) #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 59b7d5f9e64..92ce34b3787 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -847,13 +847,7 @@ retry: int xfs_bdstrat_cb(struct xfs_buf *bp) { - xfs_mount_t *mp; - - mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_iorequest(bp); - return 0; - } else { + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buftrace("XFS__BDSTRAT IOERROR", bp); /* * Metadata write that didn't get logged but @@ -866,6 +860,9 @@ xfs_bdstrat_cb(struct xfs_buf *bp) else return (xfs_bioerror(bp)); } + + xfs_buf_iorequest(bp); + return 0; } /* diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d245d04e10c..d74ce113426 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -707,8 +707,8 @@ xfs_buf_item_init( * the first. If we do already have one, there is * nothing to do here so return. */ - if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp) - XFS_BUF_SET_FSPRIVATE3(bp, mp); + if (bp->b_mount != mp) + bp->b_mount = mp; XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 51412cced01..35cca98bd94 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -267,21 +267,16 @@ STATIC void xlog_recover_iodone( struct xfs_buf *bp) { - xfs_mount_t *mp; - - ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); - if (XFS_BUF_GETERROR(bp)) { /* * We're not going to bother about retrying * this during recovery. One strike! */ - mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); xfs_ioerror_alert("xlog_recover_iodone", - mp, bp, XFS_BUF_ADDR(bp)); - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + bp->b_mount, bp, XFS_BUF_ADDR(bp)); + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); } - XFS_BUF_SET_FSPRIVATE(bp, NULL); + bp->b_mount = NULL; XFS_BUF_CLR_IODONE_FUNC(bp); xfs_biodone(bp); } @@ -2225,9 +2220,8 @@ xlog_recover_do_buffer_trans( XFS_BUF_STALE(bp); error = xfs_bwrite(mp, bp); } else { - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } @@ -2490,9 +2484,8 @@ xlog_recover_do_inode_trans( write_inode_buffer: if (ITEM_TYPE(item) == XFS_LI_INODE) { - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } else { @@ -2623,9 +2616,8 @@ xlog_recover_do_dquot_trans( memcpy(ddq, recddq, item->ri_buf[1].i_len); ASSERT(dq_f->qlf_size == 2); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 3a82576dde9..36f3a21c54d 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -406,7 +406,7 @@ xfs_bwrite( * XXXsup how does this work for quotas. */ XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); - XFS_BUF_SET_FSPRIVATE3(bp, mp); + bp->b_mount = mp; XFS_BUF_WRITE(bp); if ((error = XFS_bwrite(bp))) { -- cgit v1.2.3 From 2175dd95741bda5f438e4efe388a8c1bb5abf1cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:31 -0500 Subject: [XFS] simplify projid check in xfs_rename Check for the project ID after attaching all inodes to the transaction. That way the unlock in the error case is done by the transaction subsystem, which guaratees that is uses the right flags (which was wrong from day one of this check), and avoids having special code unlocking an array of inodes with potential duplicates. Attaching the inode first is the method used by xfs_rename and the other namespace methods all other error that require multiple locked inodes. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_rename.c | 49 +++++++++++-------------------------------------- 1 file changed, 11 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 10642fcbb1f..86471bb40fd 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -41,31 +41,6 @@ #include "xfs_vnodeops.h" -/* - * Given an array of up to 4 inode pointers, unlock the pointed to inodes. - * If there are fewer than 4 entries in the array, the empty entries will - * be at the end and will have NULL pointers in them. - */ -STATIC void -xfs_rename_unlock4( - xfs_inode_t **i_tab, - uint lock_mode) -{ - int i; - - xfs_iunlock(i_tab[0], lock_mode); - for (i = 1; i < 4; i++) { - if (i_tab[i] == NULL) - break; - - /* - * Watch out for duplicate entries in the table. - */ - if (i_tab[i] != i_tab[i-1]) - xfs_iunlock(i_tab[i], lock_mode); - } -} - /* * Enter all inodes for a rename transaction into a sorted array. */ @@ -204,19 +179,6 @@ xfs_rename( */ xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); - /* - * If we are using project inheritance, we only allow renames - * into our tree when the project IDs are the same; else the - * tree quota mechanism would be circumvented. - */ - if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { - error = XFS_ERROR(EXDEV); - xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL); - xfs_trans_cancel(tp, cancel_flags); - goto std_return; - } - /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock @@ -241,6 +203,17 @@ xfs_rename( xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); } + /* + * If we are using project inheritance, we only allow renames + * into our tree when the project IDs are the same; else the + * tree quota mechanism would be circumvented. + */ + if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && + (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { + error = XFS_ERROR(EXDEV); + goto error_return; + } + /* * Set up the target. */ -- cgit v1.2.3 From 6d73cf133c5477f7038577bfeda603ce9946f8cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:32 -0500 Subject: [XFS] resync headers with libxfs - xfs_sb.h add the XFS_SB_VERSION2_PARENTBIT features2 that has been around in userspace for some time - xfs_inode.h: move a few things out of __KERNEL__ that are needed by userspace - xfs_mount.h: only include xfs_sync.h under __KERNEL__ - xfs_inode.c: minor whitespace fixup. I accidentaly changes this when importing this file for use by userspace. Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_inode.c | 2 +- fs/xfs/xfs_inode.h | 16 ++++++++-------- fs/xfs/xfs_mount.h | 4 ++-- fs/xfs/xfs_sb.h | 1 + 4 files changed, 12 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f6b4c0f5bfd..3adb868df18 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -870,7 +870,7 @@ xfs_iread( * around for a while. This helps to keep recently accessed * meta-data in-core longer. */ - XFS_BUF_SET_REF(bp, XFS_INO_REF); + XFS_BUF_SET_REF(bp, XFS_INO_REF); /* * Use xfs_trans_brelse() to release the buffer containing the diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 64222e266df..f0e4d79833e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -481,12 +481,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ ((pip)->i_d.di_mode & S_ISGID)) -/* - * Flags for xfs_iget() - */ -#define XFS_IGET_CREATE 0x1 -#define XFS_IGET_BULKSTAT 0x2 - /* * xfs_iget.c prototypes. */ @@ -508,8 +502,6 @@ void xfs_ireclaim(xfs_inode_t *); /* * xfs_inode.c prototypes. */ -int xfs_iread(struct xfs_mount *, struct xfs_trans *, - struct xfs_inode *, xfs_daddr_t, uint); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); @@ -582,12 +574,20 @@ do { \ #endif /* __KERNEL__ */ +/* + * Flags for xfs_iget() + */ +#define XFS_IGET_CREATE 0x1 +#define XFS_IGET_BULKSTAT 0x2 + int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, struct xfs_dinode **, struct xfs_buf **, int *, uint); int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, struct xfs_buf **, uint); +int xfs_iread(struct xfs_mount *, struct xfs_trans *, + struct xfs_inode *, xfs_daddr_t, uint); void xfs_dinode_from_disk(struct xfs_icdinode *, struct xfs_dinode *); void xfs_dinode_to_disk(struct xfs_dinode *, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index ae5da88ace2..c1e02846732 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -18,8 +18,6 @@ #ifndef __XFS_MOUNT_H__ #define __XFS_MOUNT_H__ -#include "xfs_sync.h" - typedef struct xfs_trans_reservations { uint tr_write; /* extent alloc trans */ uint tr_itruncate; /* truncate trans */ @@ -53,6 +51,8 @@ typedef struct xfs_trans_reservations { #else /* __KERNEL__ */ +#include "xfs_sync.h" + struct cred; struct log; struct xfs_mount_args; diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index e123c1499b4..1ed71916e4c 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -79,6 +79,7 @@ struct xfs_mount; #define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ +#define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ #define XFS_SB_VERSION2_OKREALFBITS \ (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ -- cgit v1.2.3 From 4d4be482a4d78ca906f45e99fd9fdb91e907f5ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:33 -0500 Subject: [XFS] add a FMODE flag to make XFS invisible I/O less hacky XFS has a mode called invisble I/O that doesn't update any of the timestamps. It's used for HSM-style applications and exposed through the nasty open by handle ioctl. Instead of doing directly assignment of file operations that set an internal flag for it add a new FMODE_NOCMTIME flag that we can check in the normal file operations. (addition of the generic VFS flag has been ACKed by Al as an interims solution) Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_file.c | 145 ++++++----------------------------------- fs/xfs/linux-2.6/xfs_ioctl.c | 29 ++++++--- fs/xfs/linux-2.6/xfs_ioctl.h | 8 +-- fs/xfs/linux-2.6/xfs_ioctl32.c | 56 ++++++---------- fs/xfs/linux-2.6/xfs_iops.h | 1 - fs/xfs/xfs_vnodeops.h | 2 - 6 files changed, 63 insertions(+), 178 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f999d20a429..a0c45cc8a6b 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -45,80 +45,44 @@ static struct vm_operations_struct xfs_file_vm_ops; -STATIC_INLINE ssize_t -__xfs_file_read( +STATIC ssize_t +xfs_file_aio_read( struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - int ioflags, loff_t pos) { struct file *file = iocb->ki_filp; + int ioflags = IO_ISAIO; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, nr_segs, &iocb->ki_pos, ioflags); } STATIC ssize_t -xfs_file_aio_read( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos); -} - -STATIC ssize_t -xfs_file_aio_read_invis( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); -} - -STATIC_INLINE ssize_t -__xfs_file_write( +xfs_file_aio_write( struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - int ioflags, loff_t pos) { - struct file *file = iocb->ki_filp; + struct file *file = iocb->ki_filp; + int ioflags = IO_ISAIO; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, &iocb->ki_pos, ioflags); } -STATIC ssize_t -xfs_file_aio_write( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos); -} - -STATIC ssize_t -xfs_file_aio_write_invis( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); -} - STATIC ssize_t xfs_file_splice_read( struct file *infilp, @@ -127,20 +91,13 @@ xfs_file_splice_read( size_t len, unsigned int flags) { - return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), - infilp, ppos, pipe, len, flags, 0); -} + int ioflags = 0; + + if (infilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; -STATIC ssize_t -xfs_file_splice_read_invis( - struct file *infilp, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, - unsigned int flags) -{ return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), - infilp, ppos, pipe, len, flags, IO_INVIS); + infilp, ppos, pipe, len, flags, ioflags); } STATIC ssize_t @@ -151,20 +108,13 @@ xfs_file_splice_write( size_t len, unsigned int flags) { - return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), - pipe, outfilp, ppos, len, flags, 0); -} + int ioflags = 0; + + if (outfilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; -STATIC ssize_t -xfs_file_splice_write_invis( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t len, - unsigned int flags) -{ return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), - pipe, outfilp, ppos, len, flags, IO_INVIS); + pipe, outfilp, ppos, len, flags, ioflags); } STATIC int @@ -275,42 +225,6 @@ xfs_file_mmap( return 0; } -STATIC long -xfs_file_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - - /* NOTE: some of the ioctl's return positive #'s as a - * byte count indicating success, such as - * readlink_by_handle. So we don't "sign flip" - * like most other routines. This means true - * errors need to be returned as a negative value. - */ - return xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); -} - -STATIC long -xfs_file_ioctl_invis( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - - /* NOTE: some of the ioctl's return positive #'s as a - * byte count indicating success, such as - * readlink_by_handle. So we don't "sign flip" - * like most other routines. This means true - * errors need to be returned as a negative value. - */ - return xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p); -} - /* * mmap()d file has taken write protection fault and is being made * writable. We can set the page state up correctly for a writable @@ -346,25 +260,6 @@ const struct file_operations xfs_file_operations = { #endif }; -const struct file_operations xfs_invis_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read_invis, - .aio_write = xfs_file_aio_write_invis, - .splice_read = xfs_file_splice_read_invis, - .splice_write = xfs_file_splice_write_invis, - .unlocked_ioctl = xfs_file_ioctl_invis, -#ifdef CONFIG_COMPAT - .compat_ioctl = xfs_file_compat_invis_ioctl, -#endif - .mmap = xfs_file_mmap, - .open = xfs_file_open, - .release = xfs_file_release, - .fsync = xfs_file_fsync, -}; - - const struct file_operations xfs_dir_file_operations = { .open = xfs_dir_open, .read = generic_read_dir, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index c8f1e632ba9..0264c8719ff 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -319,10 +319,11 @@ xfs_open_by_handle( put_unused_fd(new_fd); return -XFS_ERROR(-PTR_ERR(filp)); } + if (inode->i_mode & S_IFREG) { /* invisible operation should not change atime */ filp->f_flags |= O_NOATIME; - filp->f_op = &xfs_invis_file_operations; + filp->f_mode |= FMODE_NOCMTIME; } fd_install(new_fd, filp); @@ -1328,21 +1329,31 @@ xfs_ioc_getbmapx( return 0; } -int -xfs_ioctl( - xfs_inode_t *ip, +/* + * Note: some of the ioctl's return positive numbers as a + * byte count indicating success, such as readlink_by_handle. + * So we don't "sign flip" like most other routines. This means + * true errors need to be returned as a negative value. + */ +long +xfs_file_ioctl( struct file *filp, - int ioflags, unsigned int cmd, - void __user *arg) + unsigned long p) { struct inode *inode = filp->f_path.dentry->d_inode; - xfs_mount_t *mp = ip->i_mount; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; int error; - xfs_itrace_entry(XFS_I(inode)); - switch (cmd) { + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + xfs_itrace_entry(ip); + + switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index d92131c827b..8c16bf2d7e0 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -68,13 +68,13 @@ xfs_attrmulti_attr_remove( __uint32_t flags); extern long -xfs_file_compat_ioctl( - struct file *file, +xfs_file_ioctl( + struct file *filp, unsigned int cmd, - unsigned long arg); + unsigned long p); extern long -xfs_file_compat_invis_ioctl( +xfs_file_compat_ioctl( struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b34b3d8892a..0504cece9f6 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -599,19 +599,24 @@ out: return error; } -STATIC long -xfs_compat_ioctl( - xfs_inode_t *ip, - struct file *filp, - int ioflags, - unsigned cmd, - void __user *arg) +long +xfs_file_compat_ioctl( + struct file *filp, + unsigned cmd, + unsigned long p) { - struct inode *inode = filp->f_path.dentry->d_inode; - xfs_mount_t *mp = ip->i_mount; - int error; + struct inode *inode = filp->f_path.dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; + int error; + + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + xfs_itrace_entry(ip); - xfs_itrace_entry(XFS_I(inode)); switch (cmd) { /* No size or alignment issues on any arch */ case XFS_IOC_DIOINFO: @@ -632,7 +637,7 @@ xfs_compat_ioctl( case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); #ifndef BROKEN_X86_ALIGNMENT /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: @@ -646,7 +651,7 @@ xfs_compat_ioctl( case XFS_IOC_FSGEOMETRY_V1: case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSRT: - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); #else case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: @@ -687,7 +692,7 @@ xfs_compat_ioctl( case XFS_IOC_SETXFLAGS_32: case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); case XFS_IOC_SWAPEXT: { struct xfs_swapext sxp; struct compat_xfs_swapext __user *sxu = arg; @@ -738,26 +743,3 @@ xfs_compat_ioctl( return -XFS_ERROR(ENOIOCTLCMD); } } - -long -xfs_file_compat_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - return xfs_compat_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); -} - -long -xfs_file_compat_invis_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - return xfs_compat_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, - (void __user *)p); -} diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index 8b1a1e31dc2..ef41c92ce66 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h @@ -22,7 +22,6 @@ struct xfs_inode; extern const struct file_operations xfs_file_operations; extern const struct file_operations xfs_dir_file_operations; -extern const struct file_operations xfs_invis_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 2a45b00ad32..55d955ec4ec 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -53,8 +53,6 @@ int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); -int xfs_ioctl(struct xfs_inode *ip, struct file *filp, - int ioflags, unsigned int cmd, void __user *arg); ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags); -- cgit v1.2.3 From c4cd747ee6c3ba1e7727878e3fce482d0d8c0136 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:34 -0500 Subject: [XFS] use inode_change_ok for setattr permission checking Instead of implementing our own checks use inode_change_ok to check for necessary permission in setattr. There is a slight change in behaviour as inode_change_ok doesn't allow i_mode updates to add the suid or sgid without superuser privilegues while the old XFS code just stripped away those bits from the file mode. (First sent on Semptember 29th) Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_vnodeops.c | 149 ++++++++++++-------------------------------------- 1 file changed, 36 insertions(+), 113 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 4547608b46c..f07bf8768c3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -70,7 +70,6 @@ xfs_setattr( gid_t gid=0, igid=0; int timeflags = 0; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; - int file_owner; int need_iolock = 1; xfs_itrace_entry(ip); @@ -81,6 +80,10 @@ xfs_setattr( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + code = -inode_change_ok(inode, iattr); + if (code) + return code; + olddquot1 = olddquot2 = NULL; udqp = gdqp = NULL; @@ -158,56 +161,6 @@ xfs_setattr( xfs_ilock(ip, lock_flags); - /* boolean: are we the file owner? */ - file_owner = (current_fsuid() == ip->i_d.di_uid); - - /* - * Change various properties of a file. - * Only the owner or users with CAP_FOWNER - * capability may do these things. - */ - if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) { - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal - * to the file owner ID, except in cases where the - * CAP_FSETID capability is applicable. - */ - if (!file_owner && !capable(CAP_FOWNER)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - - /* - * CAP_FSETID overrides the following restrictions: - * - * The effective user ID of the calling process shall match - * the file owner when setting the set-user-ID and - * set-group-ID bits on that file. - * - * The effective group ID or one of the supplementary group - * IDs of the calling process shall match the group owner of - * the file when setting the set-group-ID bit on that file - */ - if (mask & ATTR_MODE) { - mode_t m = 0; - - if ((iattr->ia_mode & S_ISUID) && !file_owner) - m |= S_ISUID; - if ((iattr->ia_mode & S_ISGID) && - !in_group_p((gid_t)ip->i_d.di_gid)) - m |= S_ISGID; -#if 0 - /* Linux allows this, Irix doesn't. */ - if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) - m |= S_ISVTX; -#endif - if (m && !capable(CAP_FSETID)) - iattr->ia_mode &= ~m; - } - } - /* * Change file ownership. Must be the owner or privileged. */ @@ -223,22 +176,6 @@ xfs_setattr( gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - /* - * CAP_CHOWN overrides the following restrictions: - * - * If _POSIX_CHOWN_RESTRICTED is defined, this capability - * shall override the restriction that a process cannot - * change the user ID of a file it owns and the restriction - * that the group ID supplied to the chown() function - * shall be equal to either the group ID or one of the - * supplementary group IDs of the calling process. - */ - if ((iuid != uid || - (igid != gid && !in_group_p((gid_t)gid))) && - !capable(CAP_CHOWN)) { - code = XFS_ERROR(EPERM); - goto error_return; - } /* * Do a quota reservation only if uid/gid is actually * going to change. @@ -276,36 +213,22 @@ xfs_setattr( code = XFS_ERROR(EINVAL); goto error_return; } + /* * Make sure that the dquots are attached to the inode. */ - if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) + code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); + if (code) goto error_return; - } - /* - * Change file access or modified times. - */ - if (mask & (ATTR_ATIME|ATTR_MTIME)) { - if (!file_owner) { - if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) && - !capable(CAP_FOWNER)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - } - } - - /* - * Now we can make the changes. Before we join the inode - * to the transaction, if ATTR_SIZE is set then take care of - * the part of the truncation that must be done without the - * inode lock. This needs to be done before joining the inode - * to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. - */ - if (mask & ATTR_SIZE) { - code = 0; + /* + * Now we can make the changes. Before we join the inode + * to the transaction, if ATTR_SIZE is set then take care of + * the part of the truncation that must be done without the + * inode lock. This needs to be done before joining the inode + * to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ if (iattr->ia_size > ip->i_size) { /* * Do the first part of growing a file: zero any data @@ -360,17 +283,10 @@ xfs_setattr( } commit_flags = XFS_TRANS_RELEASE_LOG_RES; xfs_ilock(ip, XFS_ILOCK_EXCL); - } - if (tp) { xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ihold(tp, ip); - } - /* - * Truncate file. Must have write permission and not be a directory. - */ - if (mask & ATTR_SIZE) { /* * Only change the c/mtime if we are changing the size * or we are explicitly asked to change it. This handles @@ -410,20 +326,9 @@ xfs_setattr( */ xfs_iflags_set(ip, XFS_ITRUNCATED); } - } - - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) { - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= iattr->ia_mode & ~S_IFMT; - - xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); - timeflags |= XFS_ICHGTIME_CHG; + } else if (tp) { + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ihold(tp, ip); } /* @@ -471,6 +376,24 @@ xfs_setattr( timeflags |= XFS_ICHGTIME_CHG; } + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + timeflags |= XFS_ICHGTIME_CHG; + } /* * Change file access or modified times. -- cgit v1.2.3 From cfbe52672fbc6f333892e8dde82c35e0a76aa5f5 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 12 Dec 2008 15:27:25 +1100 Subject: [XFS] set b_error from bio error in xfs_buf_bio_end_io Preserve any error returned by the bio layer. Reviewed-by: Eric Sandeen Reviewed-by: Tim Shimmin Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index cd89c56715a..aae8d0768ec 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1126,8 +1126,7 @@ xfs_buf_bio_end_io( unsigned int blocksize = bp->b_target->bt_bsize; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - bp->b_error = EIO; + xfs_buf_ioerror(bp, -error); do { struct page *page = bvec->bv_page; -- cgit v1.2.3 From 8187926bdae98648db24db3391c4efd21ec669b1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Dec 2008 14:18:16 -0800 Subject: posix-timers: simplify de_thread()->exit_itimers() path Impact: simplify code de_thread() postpones release_task(leader) until after exit_itimers(). This was needed because !SIGEV_THREAD_ID timers could use ->group_leader without get_task_struct(). With the recent changes we can release the leader earlier and simplify the code. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- fs/exec.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index ec5df9a3831..b4e5b8a9216 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -772,7 +772,6 @@ static int de_thread(struct task_struct *tsk) struct signal_struct *sig = tsk->signal; struct sighand_struct *oldsighand = tsk->sighand; spinlock_t *lock = &oldsighand->siglock; - struct task_struct *leader = NULL; int count; if (thread_group_empty(tsk)) @@ -810,7 +809,7 @@ static int de_thread(struct task_struct *tsk) * and to assume its PID: */ if (!thread_group_leader(tsk)) { - leader = tsk->group_leader; + struct task_struct *leader = tsk->group_leader; sig->notify_count = -1; /* for exit_notify() */ for (;;) { @@ -862,8 +861,9 @@ static int de_thread(struct task_struct *tsk) BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; - write_unlock_irq(&tasklist_lock); + + release_task(leader); } sig->group_exit_task = NULL; @@ -872,8 +872,6 @@ static int de_thread(struct task_struct *tsk) no_thread_group: exit_itimers(sig); flush_itimer_signals(); - if (leader) - release_task(leader); if (atomic_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; -- cgit v1.2.3 From 13bd41bc227a48d6cf8992a3286bf6eba3c71a0c Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 16 Dec 2008 00:23:34 -0800 Subject: proc: enclose desc variable of show_stat() in CONFIG_SPARSE_IRQ Impact: restructure code to fix compiler warning commit 240d367b4e6c6e3c5075e034db14dba60a6f5fa7 moved desc usage point into #ifdef CONFIG_SPARSE_IRQ. Eliminate the desc variable, otherwise following warning happens: fs/proc/stat.c: In function 'show_stat': fs/proc/stat.c:31: warning: unused variable 'desc' [ akpm: cleaned up the patch to remove #ifdef ] Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- fs/proc/stat.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 3cb9492801c..3bb1cf1e742 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,7 +27,6 @@ static int show_stat(struct seq_file *p, void *v) u64 sum = 0; struct timespec boottime; unsigned int per_irq_sum; - struct irq_desc *desc; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -47,8 +46,7 @@ static int show_stat(struct seq_file *p, void *v) guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); for_each_irq_nr(j) { #ifdef CONFIG_SPARSE_IRQ - desc = irq_to_desc(j); - if (!desc) + if (!irq_to_desc(j)) continue; #endif sum += kstat_irqs_cpu(j, i); @@ -98,8 +96,7 @@ static int show_stat(struct seq_file *p, void *v) for_each_irq_nr(j) { per_irq_sum = 0; #ifdef CONFIG_SPARSE_IRQ - desc = irq_to_desc(j); - if (!desc) { + if (!irq_to_desc(j)) { seq_printf(p, " %u", per_irq_sum); continue; } -- cgit v1.2.3 From d69e83d99cf87e3328c47bb54684360e32aef17d Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Tue, 16 Dec 2008 10:21:34 -0600 Subject: jfs: ensure symlinks are NUL-terminated This is an alternate fix for a bug reported and fixed by Duane Griffin. Signed-off-by: Dave Kleikamp Reported-by: Duane Griffin --- fs/jfs/inode.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 210339784b5..b00ee9f05a0 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -59,8 +59,14 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino) if (inode->i_size >= IDATASIZE) { inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &jfs_aops; - } else + } else { inode->i_op = &jfs_symlink_inode_operations; + /* + * The inline data should be null-terminated, but + * don't let on-disk corruption crash the kernel + */ + JFS_IP(inode)->i_inline[inode->i_size] = '\0'; + } } else { inode->i_op = &jfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); -- cgit v1.2.3 From d415867e0abc35e3b2f0d4196e98c339d6fe29a2 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Mon, 22 Dec 2008 17:50:56 +1100 Subject: [XFS] Use the incore inode size in xfs_file_readdir() We should be using the incore inode size here not the linux inode size. The incore inode size is always up to date for directories whereas the linux inode size is not updated for directories. We've hit assertions in xfs_bmap() and traced it back to the linux inode size being zero but the incore size being correct. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index a0c45cc8a6b..e14c4e3aea0 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -204,7 +204,7 @@ xfs_file_readdir( * point we can change the ->readdir prototype to include the * buffer size. */ - bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size); + bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size); error = xfs_readdir(ip, dirent, bufsize, (xfs_off_t *)&filp->f_pos, filldir); -- cgit v1.2.3 From 4fdc7781799926dca6c3a3bb6e9533a9718c4dea Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Mon, 22 Dec 2008 17:52:58 +1100 Subject: [XFS] Remove XFS_BUF_SHUT() and friends Code does nothing so remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.h | 4 ---- fs/xfs/xfs_buf_item.c | 16 +--------------- fs/xfs/xfs_inode.c | 1 - 3 files changed, 1 insertion(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index af8764e02d7..288ae7c4c80 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -312,10 +312,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) #define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) -#define XFS_BUF_SHUT(bp) do { } while (0) -#define XFS_BUF_UNSHUT(bp) do { } while (0) -#define XFS_BUF_ISSHUT(bp) (0) - #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d74ce113426..92af4098c7e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -998,21 +998,7 @@ xfs_buf_iodone_callbacks( xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); - - /* - * XFS_SHUT flag gets set when we go thru the - * entire buffer cache and deliberately start - * throwing away delayed write buffers. - * Since there's no biowait done on those, - * we should just brelse them. - */ - if (XFS_BUF_ISSHUT(bp)) { - XFS_BUF_UNSHUT(bp); - xfs_buf_relse(bp); - } else { - xfs_biodone(bp); - } - + xfs_biodone(bp); return; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3adb868df18..5a5e035e5d3 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2804,7 +2804,6 @@ cluster_corrupt_out: XFS_BUF_CLR_BDSTRAT_FUNC(bp); XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); - XFS_BUF_SHUT(bp); XFS_BUF_ERROR(bp,EIO); xfs_biodone(bp); } else { -- cgit v1.2.3 From 9f6c92b9cc2fd41d6c7b493be5637cc5b5659880 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Mon, 22 Dec 2008 17:56:49 +1100 Subject: [XFS] Fix speculative allocation beyond eof Speculative allocation beyond eof doesn't work properly. It was broken some time ago after a code cleanup that moved what is now xfs_iomap_eof_align_last_fsb() and xfs_iomap_eof_want_preallocate() out of xfs_iomap_write_delay() into separate functions. The code used to use the current file size in various checks but got changed to be max(file_size, i_new_size). Since i_new_size is the result of 'offset + count' then in xfs_iomap_eof_want_preallocate() the check for '(offset + count) <= isize' will always be true. ie if 'offset + count' is > ip->i_size then isize will be i_new_size and equal to 'offset + count'. This change fixes all the places that used to use the current file size. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_iomap.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 67f22b2b44b..911062cf73a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -290,7 +290,6 @@ STATIC int xfs_iomap_eof_align_last_fsb( xfs_mount_t *mp, xfs_inode_t *ip, - xfs_fsize_t isize, xfs_extlen_t extsize, xfs_fileoff_t *last_fsb) { @@ -306,14 +305,14 @@ xfs_iomap_eof_align_last_fsb( * stripe width and we are allocating past the allocation eof. */ else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && - (isize >= XFS_FSB_TO_B(mp, mp->m_swidth))) + (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth))) new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); /* * Roundup the allocation request to a stripe unit (m_dalign) boundary * if the file size is >= stripe unit size, and we are allocating past * the allocation eof. */ - else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign))) + else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign))) new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); /* @@ -403,7 +402,6 @@ xfs_iomap_write_direct( xfs_filblks_t count_fsb, resaligned; xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; - xfs_fsize_t isize; int nimaps; int bmapi_flag; int quota_flag; @@ -426,15 +424,10 @@ xfs_iomap_write_direct( rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); - isize = ip->i_size; - if (ip->i_new_size > isize) - isize = ip->i_new_size; - offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); - if ((offset + count) > isize) { - error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, - &last_fsb); + if ((offset + count) > ip->i_size) { + error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) goto error_out; } else { @@ -559,7 +552,6 @@ STATIC int xfs_iomap_eof_want_preallocate( xfs_mount_t *mp, xfs_inode_t *ip, - xfs_fsize_t isize, xfs_off_t offset, size_t count, int ioflag, @@ -573,7 +565,7 @@ xfs_iomap_eof_want_preallocate( int n, error, imaps; *prealloc = 0; - if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize) + if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) return 0; /* @@ -617,7 +609,6 @@ xfs_iomap_write_delay( xfs_fileoff_t ioalign; xfs_fsblock_t firstblock; xfs_extlen_t extsz; - xfs_fsize_t isize; int nimaps; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; int prealloc, fsynced = 0; @@ -637,11 +628,7 @@ xfs_iomap_write_delay( offset_fsb = XFS_B_TO_FSBT(mp, offset); retry: - isize = ip->i_size; - if (ip->i_new_size > isize) - isize = ip->i_new_size; - - error = xfs_iomap_eof_want_preallocate(mp, ip, isize, offset, count, + error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; @@ -655,8 +642,7 @@ retry: } if (prealloc || extsz) { - error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, - &last_fsb); + error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) return error; } -- cgit v1.2.3 From efc557570dc99b46e46a7be51c3c7402b485e829 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Dec 2008 12:27:36 -0500 Subject: [XFS] avoid memory allocations in xfs_fs_vcmn_err xfs_fs_vcmn_err can be called under a spinlock, but does a sleeping memory allocation to create buffer for it's internal sprintf. Fortunately it's the only caller of icmn_err, so we can merge the two and have one single static buffer and spinlock protecting it. While we're at it make sure we proper __attribute__ format annotations so that the compiler can detect mismatched format strings. Reported-by: Alexander Beregalov Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Lachlan McIlroy --- fs/xfs/support/debug.c | 37 ++++++++++++++++++++++++++++++++----- fs/xfs/support/debug.h | 2 -- fs/xfs/xfs_error.c | 15 --------------- fs/xfs/xfs_error.h | 12 ++++++++---- 4 files changed, 40 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 636104254cf..ae548296542 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -18,6 +18,13 @@ #include #include "debug.h" +/* xfs_mount.h drags a lot of crap in, sorry.. */ +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_ag.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" + static char message[1024]; /* keep it off the stack */ static DEFINE_SPINLOCK(xfs_err_lock); @@ -55,22 +62,42 @@ cmn_err(register int level, char *fmt, ...) } void -icmn_err(register int level, char *fmt, va_list ap) +xfs_fs_vcmn_err( + int level, + struct xfs_mount *mp, + char *fmt, + va_list ap) { - ulong flags; - int len; + unsigned long flags; + int len = 0; level &= XFS_ERR_MASK; - if(level > XFS_MAX_ERR_LEVEL) + if (level > XFS_MAX_ERR_LEVEL) level = XFS_MAX_ERR_LEVEL; + spin_lock_irqsave(&xfs_err_lock,flags); - len = vsnprintf(message, sizeof(message), fmt, ap); + + if (mp) { + len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname); + + /* + * Skip the printk if we can't print anything useful + * due to an over-long device name. + */ + if (len >= sizeof(message)) + goto out; + } + + len = vsnprintf(message + len, sizeof(message) - len, fmt, ap); if (len >= sizeof(message)) len = sizeof(message) - 1; if (message[len-1] == '\n') message[len-1] = 0; + printk("%s%s\n", err_level[level], message); + out: spin_unlock_irqrestore(&xfs_err_lock,flags); + BUG_ON(level == CE_PANIC); } diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index 75845f95081..6f4fd37c67a 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -27,8 +27,6 @@ #define CE_ALERT 1 /* alert */ #define CE_PANIC 0 /* panic */ -extern void icmn_err(int, char *, va_list) - __attribute__ ((format (printf, 2, 0))); extern void cmn_err(int, char *, ...) __attribute__ ((format (printf, 2, 3))); extern void assfail(char *expr, char *f, int l); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index f227ecd1a29..92d5cd5bf4f 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -153,21 +153,6 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) } #endif /* DEBUG */ -static void -xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) -{ - if (mp != NULL) { - char *newfmt; - int len = 16 + mp->m_fsname_len + strlen(fmt); - - newfmt = kmem_alloc(len, KM_SLEEP); - sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt); - icmn_err(level, newfmt, ap); - kmem_free(newfmt); - } else { - icmn_err(level, fmt, ap); - } -} void xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...) diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 11543f10b0c..0c93051c465 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -159,11 +159,15 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); #define XFS_PTAG_FSBLOCK_ZERO 0x00000080 struct xfs_mount; -/* PRINTFLIKE4 */ + +extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp, + char *fmt, va_list ap) + __attribute__ ((format (printf, 3, 0))); extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, - char *fmt, ...); -/* PRINTFLIKE3 */ -extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); + char *fmt, ...) + __attribute__ ((format (printf, 4, 5))); +extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...) + __attribute__ ((format (printf, 3, 4))); extern void xfs_hex_dump(void *p, int length); -- cgit v1.2.3 From ad1ad968f4e7b06c75741575ea077e25a87da49a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Dec 2008 09:59:06 -0500 Subject: [XFS] handle unaligned data in xfs_bmbt_disk_get_all In libxfs xfs_bmbt_disk_get_all needs to handle unaligned data and thus has been updated to use get_unaligned_be64. In kernelspace we don't strictly need it as the routine is only used for tracing and xfsidbg, but let's keep the two implementations in sync. Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_bmap_btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index e46e02b8e27..8f1ec73725d 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -208,7 +208,8 @@ xfs_bmbt_disk_get_all( xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s) { - __xfs_bmbt_get_all(be64_to_cpu(r->l0), be64_to_cpu(r->l1), s); + __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), + get_unaligned_be64(&r->l1), s); } /* -- cgit v1.2.3 From 136221fc3219b3805c48db5da065e8e3467175d4 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 23 Dec 2008 15:21:30 -0500 Subject: nfs: remove redundant tests on reading new pages aops->readpages() and its NFS helper readpage_async_filler() will only be called to do readahead I/O for newly allocated pages. So it's not necessary to test for the always 0 dirty/uptodate page flags. The removal of nfs_wb_page() call also fixes a readahead bug: the NFS readahead has been synchronous since 2.6.23, because that call will clear PG_readahead, which is the reminder for asynchronous readahead. More background: the PG_readahead page flag is shared with PG_reclaim, one for read path and the other for write path. clear_page_dirty_for_io() unconditionally clears PG_readahead to prevent possible readahead residuals, assuming itself to be always called in the write path. However, NFS is one and the only exception in that it _always_ calls clear_page_dirty_for_io() in the read path, i.e. for readpages()/readpage(). Cc: Trond Myklebust Signed-off-by: Wu Fengguang Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 40d17987d0e..f856004bb7f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -533,12 +533,6 @@ readpage_async_filler(void *data, struct page *page) unsigned int len; int error; - error = nfs_wb_page(inode, page); - if (error) - goto out_unlock; - if (PageUptodate(page)) - goto out_unlock; - len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); -- cgit v1.2.3 From d716f0b8a57f8577bcd869e7dcb5a0add9f6fc5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:32 -0500 Subject: SUNRPC: nfsacl_encode/nfsacl_decode should be exported as GPL-only Again, this has never been intended as a public abi for out-of-tree modules. Signed-off-by: Trond Myklebust --- fs/nfs_common/nfsacl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index c11f5375d7c..04133aacb1e 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -29,8 +29,8 @@ MODULE_LICENSE("GPL"); -EXPORT_SYMBOL(nfsacl_encode); -EXPORT_SYMBOL(nfsacl_decode); +EXPORT_SYMBOL_GPL(nfsacl_encode); +EXPORT_SYMBOL_GPL(nfsacl_decode); struct nfsacl_encode_desc { struct xdr_array2_desc desc; -- cgit v1.2.3 From 2de59872a7842143f4507832e7c1f5123c47feb7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:33 -0500 Subject: LOCKD: Make lockd_up() and lockd_down() exported GPL-only Signed-off-by: Trond Myklebust --- fs/lockd/svc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 56b076736b5..252d80163d0 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -45,7 +45,7 @@ static struct svc_program nlmsvc_program; struct nlmsvc_binding * nlmsvc_ops; -EXPORT_SYMBOL(nlmsvc_ops); +EXPORT_SYMBOL_GPL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; @@ -300,7 +300,7 @@ out: mutex_unlock(&nlmsvc_mutex); return error; } -EXPORT_SYMBOL(lockd_up); +EXPORT_SYMBOL_GPL(lockd_up); /* * Decrement the user count and bring down lockd if we're the last. @@ -329,7 +329,7 @@ lockd_down(void) out: mutex_unlock(&nlmsvc_mutex); } -EXPORT_SYMBOL(lockd_down); +EXPORT_SYMBOL_GPL(lockd_down); #ifdef CONFIG_SYSCTL -- cgit v1.2.3 From df94f000c46c055cf439f5b92807cd827557ffbc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 23 Dec 2008 15:21:33 -0500 Subject: lockd: convert reclaimer thread to kthread interface My understanding is that there is a push to turn the kernel_thread interface into a non-exported symbol and move all kernel threads to use the kthread API. This patch changes lockd to use kthread_run to spawn the reclaimer thread. I've made the assumption here that the extra module references taken when we spawn this thread are unnecessary and removed them. I've also added a KERN_ERR printk that pops if the thread can't be spawned to warn the admin that the locks won't be reclaimed. In the future, it would be nice to be able to notify userspace that locks have been lost (probably by implementing SIGLOST), and adding some good policies about how long we should reattempt to reclaim the locks. Finally, I removed a comment about memory leaks that I believe is obsolete and added a new one to clarify the result of sending a SIGKILL to the reclaimer thread. As best I can tell, doing so doesn't actually cause a memory leak. I consider this patch 2.6.29 material. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/lockd/clntlock.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 8307dd64bf4..94d42cc4e39 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -14,6 +14,7 @@ #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_CLIENT @@ -191,11 +192,15 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) void nlmclnt_recovery(struct nlm_host *host) { + struct task_struct *task; + if (!host->h_reclaiming++) { nlm_get_host(host); - __module_get(THIS_MODULE); - if (kernel_thread(reclaimer, host, CLONE_FS | CLONE_FILES) < 0) - module_put(THIS_MODULE); + task = kthread_run(reclaimer, host, "%s-reclaim", host->h_name); + if (IS_ERR(task)) + printk(KERN_ERR "lockd: unable to spawn reclaimer " + "thread. Locks for %s won't be reclaimed! " + "(%ld)\n", host->h_name, PTR_ERR(task)); } } @@ -207,7 +212,6 @@ reclaimer(void *ptr) struct file_lock *fl, *next; u32 nsmstate; - daemonize("%s-reclaim", host->h_name); allow_signal(SIGKILL); down_write(&host->h_rwsem); @@ -233,7 +237,12 @@ restart: list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { list_del_init(&fl->fl_u.nfs_fl.list); - /* Why are we leaking memory here? --okir */ + /* + * sending this thread a SIGKILL will result in any unreclaimed + * locks being removed from the h_granted list. This means that + * the kernel will not attempt to reclaim them again if a new + * reclaimer thread is spawned for this host. + */ if (signalled()) continue; if (nlmclnt_reclaim(host, fl) != 0) @@ -261,5 +270,5 @@ restart: nlm_release_host(host); lockd_down(); unlock_kernel(); - module_put_and_exit(0); + return 0; } -- cgit v1.2.3 From 7b5d2b98e118716dd1ccc2808fae88f6c4b16d54 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:34 -0500 Subject: NFS: rename nfs_path variable Clean up: I'm about to move the declaration of nfs_mount into fs/nfs/internal.h and include it in fs/nfs/nfsroot.c. There's a conflicting definition of nfs_path in fs/nfs/internal.h and fs/nfs/nfsroot.c, so rename the private one. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 8478fc25dae..f1c0a066439 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -100,7 +100,7 @@ static char nfs_root_name[256] __initdata = ""; static __be32 servaddr __initdata = 0; /* Name of directory to mount */ -static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, }; +static char nfs_export_path[NFS_MAXPATHLEN] __initdata = { 0, }; /* NFS-related data */ static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ @@ -312,7 +312,7 @@ static int __init root_nfs_name(char *name) printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); return -1; } - sprintf(nfs_path, buf, cp); + sprintf(nfs_export_path, buf, cp); return 1; } @@ -340,7 +340,7 @@ static int __init root_nfs_addr(void) static void __init root_nfs_print(void) { printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", - nfs_path, nfs_data.hostname); + nfs_export_path, nfs_data.hostname); printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans); printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n", @@ -493,10 +493,10 @@ static int __init root_nfs_get_handle(void) set_sockaddr(&sin, servaddr, htons(mount_port)); status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL, - nfs_path, version, protocol, &fh); + nfs_export_path, version, protocol, &fh); if (status < 0) printk(KERN_ERR "Root-NFS: Server returned error %d " - "while mounting %s\n", status, nfs_path); + "while mounting %s\n", status, nfs_export_path); else { nfs_data.root.size = fh.size; memcpy(nfs_data.root.data, fh.data, fh.size); -- cgit v1.2.3 From 146ec944bbd31d241a44a00518b054fb01921d22 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:34 -0500 Subject: NFS: Move declaration of nfs_mount() to fs/nfs/internal.h Clean up: The nfs_mount() function is not to be used outside of the NFS client. Move its public declaration to fs/nfs/internal.h. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 4 ++++ fs/nfs/nfsroot.c | 2 ++ 2 files changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d212ee41caf..7a38cc7b413 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -63,6 +63,10 @@ struct nfs_parsed_mount_data { struct security_mnt_opts lsm_opts; }; +/* mount_clnt.c */ +extern int nfs_mount(struct sockaddr *, size_t, char *, char *, + int, int, struct nfs_fh *); + /* client.c */ extern struct rpc_program nfs_program; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index f1c0a066439..a96e5fdb38a 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -86,6 +86,8 @@ #include #include +#include "internal.h" + /* Define this to allow debugging output */ #undef NFSROOT_DEBUG #define NFSDBG_FACILITY NFSDBG_ROOT -- cgit v1.2.3 From c5d120f8e8b464368a7dcb038dc5c077d234d10a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:35 -0500 Subject: NFS: introduce nfs_mount_info struct for calling nfs_mount() Clean up: convert nfs_mount() to take a single data structure argument to make it simpler to add more arguments. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 13 +++++++++++-- fs/nfs/mount_clnt.c | 30 ++++++++++++------------------ fs/nfs/nfsroot.c | 17 +++++++++++------ fs/nfs/super.c | 29 +++++++++++++++-------------- 4 files changed, 49 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7a38cc7b413..4e983961346 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -64,8 +64,17 @@ struct nfs_parsed_mount_data { }; /* mount_clnt.c */ -extern int nfs_mount(struct sockaddr *, size_t, char *, char *, - int, int, struct nfs_fh *); +struct nfs_mount_request { + struct sockaddr *sap; + size_t salen; + char *hostname; + char *dirpath; + u32 version; + unsigned short protocol; + struct nfs_fh *fh; +}; + +extern int nfs_mount(struct nfs_mount_request *info); /* client.c */ extern struct rpc_program nfs_program; diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 086a6830d78..7e37113d37e 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -29,33 +29,26 @@ struct mnt_fhstatus { /** * nfs_mount - Obtain an NFS file handle for the given host and path - * @addr: pointer to server's address - * @len: size of server's address - * @hostname: name of server host, or NULL - * @path: pointer to string containing export path to mount - * @version: mount version to use for this request - * @protocol: transport protocol to use for thie request - * @fh: pointer to location to place returned file handle + * @info: pointer to mount request arguments * * Uses default timeout parameters specified by underlying transport. */ -int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, - int version, int protocol, struct nfs_fh *fh) +int nfs_mount(struct nfs_mount_request *info) { struct mnt_fhstatus result = { - .fh = fh + .fh = info->fh }; struct rpc_message msg = { - .rpc_argp = path, + .rpc_argp = info->dirpath, .rpc_resp = &result, }; struct rpc_create_args args = { - .protocol = protocol, - .address = addr, - .addrsize = len, - .servername = hostname, + .protocol = info->protocol, + .address = info->sap, + .addrsize = info->salen, + .servername = info->hostname, .program = &mnt_program, - .version = version, + .version = info->version, .authflavor = RPC_AUTH_UNIX, .flags = 0, }; @@ -63,13 +56,14 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, int status; dprintk("NFS: sending MNT request for %s:%s\n", - (hostname ? hostname : "server"), path); + (info->hostname ? info->hostname : "server"), + info->dirpath); mnt_clnt = rpc_create(&args); if (IS_ERR(mnt_clnt)) goto out_clnt_err; - if (version == NFS_MNT3_VERSION) + if (info->version == NFS_MNT3_VERSION) msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; else msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index a96e5fdb38a..f015e0d62ad 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -487,15 +487,20 @@ static int __init root_nfs_get_handle(void) { struct nfs_fh fh; struct sockaddr_in sin; + struct nfs_mount_request request = { + .sap = (struct sockaddr *)&sin, + .salen = sizeof(sin), + .dirpath = nfs_export_path, + .version = (nfs_data.flags & NFS_MOUNT_VER3) ? + NFS_MNT3_VERSION : NFS_MNT_VERSION, + .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? + XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, + .fh = &fh, + }; int status; - int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? - XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP; - int version = (nfs_data.flags & NFS_MOUNT_VER3) ? - NFS_MNT3_VERSION : NFS_MNT_VERSION; set_sockaddr(&sin, servaddr, htons(mount_port)); - status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL, - nfs_export_path, version, protocol, &fh); + status = nfs_mount(&request); if (status < 0) printk(KERN_ERR "Root-NFS: Server returned error %d " "while mounting %s\n", status, nfs_export_path); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f48db679a1c..2b0c8e132b5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1329,8 +1329,13 @@ out_security_failure: static int nfs_try_mount(struct nfs_parsed_mount_data *args, struct nfs_fh *root_fh) { - struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address; - char *hostname; + struct nfs_mount_request request = { + .sap = (struct sockaddr *) + &args->mount_server.address, + .dirpath = args->nfs_server.export_path, + .protocol = args->mount_server.protocol, + .fh = root_fh, + }; int status; if (args->mount_server.version == 0) { @@ -1339,42 +1344,38 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, else args->mount_server.version = NFS_MNT_VERSION; } + request.version = args->mount_server.version; if (args->mount_server.hostname) - hostname = args->mount_server.hostname; + request.hostname = args->mount_server.hostname; else - hostname = args->nfs_server.hostname; + request.hostname = args->nfs_server.hostname; /* * Construct the mount server's address. */ if (args->mount_server.address.ss_family == AF_UNSPEC) { - memcpy(sap, &args->nfs_server.address, + memcpy(request.sap, &args->nfs_server.address, args->nfs_server.addrlen); args->mount_server.addrlen = args->nfs_server.addrlen; } + request.salen = args->mount_server.addrlen; /* * autobind will be used if mount_server.port == 0 */ - nfs_set_port(sap, args->mount_server.port); + nfs_set_port(request.sap, args->mount_server.port); /* * Now ask the mount server to map our export path * to a file handle. */ - status = nfs_mount(sap, - args->mount_server.addrlen, - hostname, - args->nfs_server.export_path, - args->mount_server.version, - args->mount_server.protocol, - root_fh); + status = nfs_mount(&request); if (status == 0) return 0; dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", - hostname, status); + request.hostname, status); return status; } -- cgit v1.2.3 From 4a01b8a4ee7b12becd26a49bae57f019605658cd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:35 -0500 Subject: NFS: expand flags passed to nfs_create_rpc_client() The nfs_create_rpc_client() function sets up an RPC client for an NFS mount point. Add an option that allows it to set up an RPC transport from an unprivileged port. Instead of having nfs_create_rpc_client()'s callers retain local knowledge about how to set up an RPC client, create a couple of flag arguments to control the use of RPC_CLNT_CREATE flags. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7547600b617..d11cdaafb39 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -470,7 +470,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, static int nfs_create_rpc_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, rpc_authflavor_t flavor, - int flags) + int discrtry, int noresvport) { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { @@ -482,9 +482,13 @@ static int nfs_create_rpc_client(struct nfs_client *clp, .program = &nfs_program, .version = clp->rpc_ops->version, .authflavor = flavor, - .flags = flags, }; + if (discrtry) + args.flags |= RPC_CLNT_CREATE_DISCRTRY; + if (noresvport) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + if (!IS_ERR(clp->cl_rpcclient)) return 0; @@ -623,7 +627,7 @@ static int nfs_init_client(struct nfs_client *clp, * Create a client RPC handle for doing FSSTAT with UNIX auth only * - RFC 2623, sec 2.3.2 */ - error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0); + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0, 0); if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); @@ -979,7 +983,7 @@ static int nfs4_init_client(struct nfs_client *clp, clp->rpc_ops = &nfs_v4_clientops; error = nfs_create_rpc_client(clp, timeparms, authflavour, - RPC_CLNT_CREATE_DISCRTRY); + 1, 0); if (error < 0) goto error; memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); -- cgit v1.2.3 From 542fcc334adfea36d407cbf698d549fcb2bf6b91 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:36 -0500 Subject: NFS: move nfs_server flag initialization Make it possible for the NFSv4 mount set up logic to pass mount option flags down the stack to nfs_create_rpc_client(). This is immediately useful if we want NFS mount options to modulate settings of the underlying RPC transport, but it may be useful at some later point if other parts of the NFSv4 mount initialization logic want to know what the mount options are. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d11cdaafb39..27190337fc1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1063,6 +1063,10 @@ static int nfs4_init_server(struct nfs_server *server, nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, data->timeo, data->retrans); + /* Initialise the client representation from the mount data */ + server->flags = data->flags; + server->caps |= NFS_CAP_ATOMIC_OPEN; + /* Get a client record */ error = nfs4_set_client(server, data->nfs_server.hostname, @@ -1075,10 +1079,6 @@ static int nfs4_init_server(struct nfs_server *server, if (error < 0) goto error; - /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN; - if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); if (data->wsize) @@ -1181,6 +1181,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, parent_server = NFS_SB(data->sb); parent_client = parent_server->nfs_client; + /* Initialise the client representation from the parent server */ + nfs_server_copy_userdata(server, parent_server); + server->caps |= NFS_CAP_ATOMIC_OPEN; + /* Get a client representation. * Note: NFSv4 always uses TCP, */ error = nfs4_set_client(server, data->hostname, @@ -1193,10 +1197,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - /* Initialise the client representation from the parent server */ - nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN; - error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); if (error < 0) goto error; -- cgit v1.2.3 From d740351bf0960e89ce1aef45cfe00167cb0f9e5b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:37 -0500 Subject: NFS: add "[no]resvport" mount option The standard default security setting for NFS is AUTH_SYS. An NFS client connects to NFS servers via a privileged source port and a fixed standard destination port (2049). The client sends raw uid and gid numbers to identify users making NFS requests, and the server assumes an appropriate authority on the client has vetted these values because the source port is privileged. On Linux, by default in-kernel RPC services use a privileged port in the range between 650 and 1023 to avoid using source ports of well- known IP services. Using such a small range limits the number of NFS mount points and the number of unique NFS servers to which a client can connect concurrently. An NFS client can use unprivileged source ports to expand the range of source port numbers, allowing more concurrent server connections and more NFS mount points. Servers must explicitly allow NFS connections from unprivileged ports for this to work. In the past, bumping the value of the sunrpc.max_resvport sysctl on the client would permit the NFS client to use unprivileged ports. Bumping this setting also changes the maximum port number used by other in-kernel RPC services, some of which still required a port number less than 1023. This is exacerbated by the way source port numbers are chosen by the Linux RPC client, which starts at the top of the range and works downwards. It means that bumping the maximum means all RPC services requesting a source port will likely get an unprivileged port instead of a privileged one. Changing this setting effects all NFS mount points on a client. A sysadmin could not selectively choose which mount points would use non-privileged ports and which could not. Lastly, this mechanism of expanding the limit on the number of NFS mount points was entirely undocumented. To address the need for the NFS client to use a large range of source ports without interfering with the activity of other in-kernel RPC services, we introduce a new NFS mount option. This option explicitly tells only the NFS client to use a non-privileged source port when communicating with the NFS server for one specific mount point. This new mount option is called "resvport," like the similar NFS mount option on FreeBSD and Mac OS X. A sister patch for nfs-utils will be submitted that documents this new option in nfs(5). The default setting for this new mount option requires the NFS client to use a privileged port, as before. Explicitly specifying the "noresvport" mount option allows the NFS client to use an unprivileged source port for this mount point when connecting to the NFS server port. This mount option is supported only for text-based NFS mounts. [ Sidebar: it is widely known that security mechanisms based on the use of privileged source ports are ineffective. However, the NFS client can combine the use of unprivileged ports with the use of secure authentication mechanisms, such as Kerberos. This allows a large number of connections and mount points while ensuring a useful level of security. Eventually we may change the default setting for this option depending on the security flavor used for the mount. For example, if the mount is using only AUTH_SYS, then the default setting will be "resvport;" if the mount is using a strong security flavor such as krb5, the default setting will be "noresvport." ] Signed-off-by: Chuck Lever [Trond.Myklebust@netapp.com: Fixed a bug whereby nfs4_init_client() was being called with incorrect arguments.] Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 11 +++++++---- fs/nfs/super.c | 12 +++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 27190337fc1..3a69cacc4fa 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -627,7 +627,8 @@ static int nfs_init_client(struct nfs_client *clp, * Create a client RPC handle for doing FSSTAT with UNIX auth only * - RFC 2623, sec 2.3.2 */ - error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0, 0); + error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, + 0, data->flags & NFS_MOUNT_NORESVPORT); if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); @@ -969,7 +970,8 @@ error: static int nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, - rpc_authflavor_t authflavour) + rpc_authflavor_t authflavour, + int flags) { int error; @@ -983,7 +985,7 @@ static int nfs4_init_client(struct nfs_client *clp, clp->rpc_ops = &nfs_v4_clientops; error = nfs_create_rpc_client(clp, timeparms, authflavour, - 1, 0); + 1, flags & NFS_MOUNT_NORESVPORT); if (error < 0) goto error; memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); @@ -1034,7 +1036,8 @@ static int nfs4_set_client(struct nfs_server *server, error = PTR_ERR(clp); goto error; } - error = nfs4_init_client(clp, timeparms, ip_addr, authflavour); + error = nfs4_init_client(clp, timeparms, ip_addr, authflavour, + server->flags); if (error < 0) goto error_put; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2b0c8e132b5..e05a77be306 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -75,6 +75,7 @@ enum { Opt_acl, Opt_noacl, Opt_rdirplus, Opt_nordirplus, Opt_sharecache, Opt_nosharecache, + Opt_resvport, Opt_noresvport, /* Mount options that take integer arguments */ Opt_port, @@ -129,6 +130,8 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_nordirplus, "nordirplus" }, { Opt_sharecache, "sharecache" }, { Opt_nosharecache, "nosharecache" }, + { Opt_resvport, "resvport" }, + { Opt_noresvport, "noresvport" }, { Opt_port, "port=%u" }, { Opt_rsize, "rsize=%u" }, @@ -514,7 +517,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, { NFS_MOUNT_NONLM, ",nolock", "" }, { NFS_MOUNT_NOACL, ",noacl", "" }, { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, - { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, + { NFS_MOUNT_UNSHARED, ",nosharecache", "" }, + { NFS_MOUNT_NORESVPORT, ",noresvport", "" }, { 0, NULL, NULL } }; const struct proc_nfs_info *nfs_infop; @@ -1035,6 +1039,12 @@ static int nfs_parse_mount_options(char *raw, case Opt_nosharecache: mnt->flags |= NFS_MOUNT_UNSHARED; break; + case Opt_resvport: + mnt->flags &= ~NFS_MOUNT_NORESVPORT; + break; + case Opt_noresvport: + mnt->flags |= NFS_MOUNT_NORESVPORT; + break; /* * options that take numeric values -- cgit v1.2.3 From 50a737f86dbf99daf3a8dcbdf778a3be36bb2a39 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:37 -0500 Subject: NFS: "[no]resvport" mount option changes mountd client too If the admin has specified the "noresvport" option for an NFS mount point, the kernel's NFS client uses an unprivileged source port for the main NFS transport. The kernel's mountd client should use an unprivileged port in this case as well. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/mount_clnt.c | 4 +++- fs/nfs/super.c | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4e983961346..340ede8f608 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -72,6 +72,7 @@ struct nfs_mount_request { u32 version; unsigned short protocol; struct nfs_fh *fh; + int noresvport; }; extern int nfs_mount(struct nfs_mount_request *info); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 7e37113d37e..ca905a5bb1b 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -50,7 +50,6 @@ int nfs_mount(struct nfs_mount_request *info) .program = &mnt_program, .version = info->version, .authflavor = RPC_AUTH_UNIX, - .flags = 0, }; struct rpc_clnt *mnt_clnt; int status; @@ -59,6 +58,9 @@ int nfs_mount(struct nfs_mount_request *info) (info->hostname ? info->hostname : "server"), info->dirpath); + if (info->noresvport) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + mnt_clnt = rpc_create(&args); if (IS_ERR(mnt_clnt)) goto out_clnt_err; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e05a77be306..d8e062fe76b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1345,6 +1345,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .dirpath = args->nfs_server.export_path, .protocol = args->mount_server.protocol, .fh = root_fh, + .noresvport = args->flags & NFS_MOUNT_NORESVPORT, }; int status; -- cgit v1.2.3 From 0cb2659b818eca99235e17c04291cfa9985c14f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Dec 2008 15:21:38 -0500 Subject: NLM: allow lockd requests from an unprivileged port If the admin has specified the "noresvport" option for an NFS mount point, the kernel's NFS client uses an unprivileged source port for the main NFS transport. The kernel's lockd client should use an unprivileged port in this case as well. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/clntlock.c | 2 +- fs/lockd/host.c | 10 +++++++++- fs/nfs/client.c | 2 ++ 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 94d42cc4e39..1f3b0fc0d35 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -61,7 +61,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, nlm_init->protocol, nlm_version, - nlm_init->hostname); + nlm_init->hostname, nlm_init->noresvport); if (host == NULL) { lockd_down(); return ERR_PTR(-ENOLCK); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 70fc63a1727..acc2aa5021d 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -48,6 +48,7 @@ struct nlm_lookup_host_info { const size_t hostname_len; /* it's length */ const struct sockaddr *src_sap; /* our address (optional) */ const size_t src_len; /* it's length */ + const int noresvport; /* use non-priv port */ }; /* @@ -222,6 +223,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) host->h_nsmstate = 0; /* real NSM state */ host->h_nsmhandle = nsm; host->h_server = ni->server; + host->h_noresvport = ni->noresvport; hlist_add_head(&host->h_hash, chain); INIT_LIST_HEAD(&host->h_lockowners); spin_lock_init(&host->h_lock); @@ -272,6 +274,7 @@ nlm_destroy_host(struct nlm_host *host) * @protocol: transport protocol to use * @version: NLM protocol version * @hostname: '\0'-terminated hostname of server + * @noresvport: 1 if non-privileged port should be used * * Returns an nlm_host structure that matches the passed-in * [server address, transport protocol, NLM version, server hostname]. @@ -281,7 +284,9 @@ nlm_destroy_host(struct nlm_host *host) struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const size_t salen, const unsigned short protocol, - const u32 version, const char *hostname) + const u32 version, + const char *hostname, + int noresvport) { const struct sockaddr source = { .sa_family = AF_UNSPEC, @@ -296,6 +301,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .hostname_len = strlen(hostname), .src_sap = &source, .src_len = sizeof(source), + .noresvport = noresvport, }; dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, @@ -417,6 +423,8 @@ nlm_bind_host(struct nlm_host *host) */ if (!host->h_server) args.flags |= RPC_CLNT_CREATE_HARDRTRY; + if (host->h_noresvport) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; clnt = rpc_create(&args); if (!IS_ERR(clnt)) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 3a69cacc4fa..70b6d9e8517 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -526,6 +526,8 @@ static int nfs_start_lockd(struct nfs_server *server) .protocol = server->flags & NFS_MOUNT_TCP ? IPPROTO_TCP : IPPROTO_UDP, .nfs_version = clp->rpc_ops->version, + .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? + 1 : 0, }; if (nlm_init.nfs_version > 3) -- cgit v1.2.3 From 343104308a33c4f1e23c8e841ede95e97b870842 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:38 -0500 Subject: NFSv4: Fix up another delegation related race When we can update_open_stateid(), we need to be certain that we don't race with a delegation return. While we could do this by grabbing the nfs_client->cl_lock, a dedicated spin lock in the delegation structure will scale better. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 7 ++++- fs/nfs/delegation.h | 1 + fs/nfs/nfs4proc.c | 80 +++++++++++++++++++++++++++++++++++------------------ 3 files changed, 60 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index cc563cfa694..e0cb4ee3b23 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -140,13 +140,17 @@ static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfs if (delegation == NULL) goto nomatch; + spin_lock(&delegation->lock); if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, sizeof(delegation->stateid.data)) != 0) - goto nomatch; + goto nomatch_unlock; list_del_rcu(&delegation->super_list); nfsi->delegation_state = 0; rcu_assign_pointer(nfsi->delegation, NULL); + spin_unlock(&delegation->lock); return delegation; +nomatch_unlock: + spin_unlock(&delegation->lock); nomatch: return NULL; } @@ -172,6 +176,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation->change_attr = nfsi->change_attr; delegation->cred = get_rpccred(cred); delegation->inode = inode; + spin_lock_init(&delegation->lock); spin_lock(&clp->cl_lock); if (rcu_dereference(nfsi->delegation) != NULL) { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index f1c5e2a5d88..8299c6220e9 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -22,6 +22,7 @@ struct nfs_delegation { long flags; loff_t maxsize; __u64 change_attr; + spinlock_t lock; struct rcu_head rcu; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 83e700a2b0c..254cbff103f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -388,9 +388,8 @@ static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid write_sequnlock(&state->seqlock); } -static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags) +static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, int open_flags) { - open_flags &= (FMODE_READ|FMODE_WRITE); /* * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update @@ -408,6 +407,45 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_sta spin_unlock(&state->owner->so_lock); } +static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, int open_flags) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_delegation *deleg_cur; + int ret = 0; + + open_flags &= (FMODE_READ|FMODE_WRITE); + + rcu_read_lock(); + deleg_cur = rcu_dereference(nfsi->delegation); + if (deleg_cur == NULL) + goto no_delegation; + + spin_lock(&deleg_cur->lock); + if (nfsi->delegation != deleg_cur || + (deleg_cur->type & open_flags) != open_flags) + goto no_delegation_unlock; + + if (delegation == NULL) + delegation = &deleg_cur->stateid; + else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) + goto no_delegation_unlock; + + __update_open_stateid(state, open_stateid, &deleg_cur->stateid, open_flags); + ret = 1; +no_delegation_unlock: + spin_unlock(&deleg_cur->lock); +no_delegation: + rcu_read_unlock(); + + if (!ret && open_stateid != NULL) { + __update_open_stateid(state, open_stateid, NULL, open_flags); + ret = 1; + } + + return ret; +} + + static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags) { struct nfs_delegation *delegation; @@ -431,23 +469,23 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) nfs4_stateid stateid; int ret = -EAGAIN; - rcu_read_lock(); - delegation = rcu_dereference(nfsi->delegation); for (;;) { if (can_open_cached(state, open_mode)) { spin_lock(&state->owner->so_lock); if (can_open_cached(state, open_mode)) { update_open_stateflags(state, open_mode); spin_unlock(&state->owner->so_lock); - rcu_read_unlock(); goto out_return_state; } spin_unlock(&state->owner->so_lock); } - if (delegation == NULL) - break; - if (!can_open_delegated(delegation, open_mode)) + rcu_read_lock(); + delegation = rcu_dereference(nfsi->delegation); + if (delegation == NULL || + !can_open_delegated(delegation, open_mode)) { + rcu_read_unlock(); break; + } /* Save the delegation */ memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); @@ -455,19 +493,11 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) if (ret != 0) goto out; ret = -EAGAIN; - rcu_read_lock(); - delegation = rcu_dereference(nfsi->delegation); - /* If no delegation, try a cached open */ - if (delegation == NULL) - continue; - /* Is the delegation still valid? */ - if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0) - continue; - rcu_read_unlock(); - update_open_stateid(state, NULL, &stateid, open_mode); - goto out_return_state; + + /* Try to update the stateid using the delegation */ + if (update_open_stateid(state, NULL, &stateid, open_mode)) + goto out_return_state; } - rcu_read_unlock(); out: return ERR_PTR(ret); out_return_state: @@ -480,7 +510,6 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data struct inode *inode; struct nfs4_state *state = NULL; struct nfs_delegation *delegation; - nfs4_stateid *deleg_stateid = NULL; int ret; if (!data->rpc_done) { @@ -516,12 +545,9 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data data->owner->so_cred, &data->o_res); } - rcu_read_lock(); - delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL) - deleg_stateid = &delegation->stateid; - update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags); - rcu_read_unlock(); + + update_open_stateid(state, &data->o_res.stateid, NULL, + data->o_arg.open_flags); iput(inode); out: return state; -- cgit v1.2.3 From 86e894899820f2b3094d5557124fc22743ae0fc7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:39 -0500 Subject: NFSv4: Fix up the dereferencing of delegation->inode Without an extra lock, we cannot just assume that the delegation->inode is valid when we're traversing the rcu-protected nfs_client lists. Use the delegation->lock to ensure that it is truly valid. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index e0cb4ee3b23..13f2044a30b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -134,6 +134,17 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * return res; } +static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation) +{ + struct inode *inode = NULL; + + spin_lock(&delegation->lock); + if (delegation->inode != NULL) + inode = igrab(delegation->inode); + spin_unlock(&delegation->lock); + return inode; +} + static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) { struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); @@ -145,6 +156,7 @@ static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfs sizeof(delegation->stateid.data)) != 0) goto nomatch_unlock; list_del_rcu(&delegation->super_list); + delegation->inode = NULL; nfsi->delegation_state = 0; rcu_assign_pointer(nfsi->delegation, NULL); spin_unlock(&delegation->lock); @@ -298,9 +310,11 @@ void nfs_return_all_delegations(struct super_block *sb) restart: rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - if (delegation->inode->i_sb != sb) - continue; - inode = igrab(delegation->inode); + inode = NULL; + spin_lock(&delegation->lock); + if (delegation->inode != NULL && delegation->inode->i_sb == sb) + inode = igrab(delegation->inode); + spin_unlock(&delegation->lock); if (inode == NULL) continue; spin_lock(&clp->cl_lock); @@ -329,7 +343,7 @@ restart: goto out; rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - inode = igrab(delegation->inode); + inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) continue; spin_lock(&clp->cl_lock); @@ -376,7 +390,7 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp) restart: rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - inode = igrab(delegation->inode); + inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) continue; spin_lock(&clp->cl_lock); @@ -464,10 +478,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs struct inode *res = NULL; rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { + spin_lock(&delegation->lock); + if (delegation->inode != NULL && + nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { res = igrab(delegation->inode); - break; } + spin_unlock(&delegation->lock); + if (res != NULL) + break; } rcu_read_unlock(); return res; @@ -491,17 +509,22 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp) void nfs_delegation_reap_unclaimed(struct nfs_client *clp) { struct nfs_delegation *delegation; + struct inode *inode; restart: rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); rcu_read_unlock(); if (delegation != NULL) nfs_free_delegation(delegation); + iput(inode); goto restart; } rcu_read_unlock(); -- cgit v1.2.3 From 15c831bf1a3f8cab9812a96228145200726fea33 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:39 -0500 Subject: NFS: Use atomic bitops when changing struct nfs_delegation->flags Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 6 +++--- fs/nfs/delegation.h | 4 ++-- fs/nfs/nfs4proc.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 13f2044a30b..646ba3e75a1 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -119,7 +119,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st delegation->maxsize = res->maxsize; oldcred = delegation->cred; delegation->cred = get_rpccred(cred); - delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; + clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); NFS_I(inode)->delegation_state = delegation->type; smp_wmb(); put_rpccred(oldcred); @@ -499,7 +499,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp) struct nfs_delegation *delegation; rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) - delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; + set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); rcu_read_unlock(); } @@ -513,7 +513,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp) restart: rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) + if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 8299c6220e9..5e9f40e0a7d 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -18,10 +18,10 @@ struct nfs_delegation { struct inode *inode; nfs4_stateid stateid; int type; -#define NFS_DELEGATION_NEED_RECLAIM 1 - long flags; loff_t maxsize; __u64 change_attr; +#define NFS_DELEGATION_NEED_RECLAIM 0 + unsigned long flags; spinlock_t lock; struct rcu_head rcu; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 254cbff103f..d53aa2dace8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -344,7 +344,7 @@ static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_fla { if ((delegation->type & open_flags) != open_flags) return 0; - if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) + if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) return 0; return 1; } @@ -536,7 +536,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data if (delegation) delegation_flags = delegation->flags; rcu_read_unlock(); - if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) + if ((delegation_flags & 1UL<inode, data->owner->so_cred, &data->o_res); @@ -667,7 +667,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state opendata->o_arg.fh = NFS_FH(state->inode); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) + if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) delegation_type = delegation->type; rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; @@ -839,7 +839,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_lock(); delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); if (delegation != NULL && - (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) { + test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) { rcu_read_unlock(); goto out_no_action; } -- cgit v1.2.3 From 028600143079c8e8f8366bbc2eb29977743baf3a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:40 -0500 Subject: NFSv4: Clean up for the state loss reclaimer Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 130 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 81 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 401ef8b28f9..99182b3229e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -811,7 +811,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) nfs4_recover_state(clp); } -static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state) +static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { struct inode *inode = state->inode; struct file_lock *fl; @@ -844,7 +844,7 @@ out_err: return status; } -static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp) +static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops) { struct nfs4_state *state; struct nfs4_lock_state *lock; @@ -863,15 +863,15 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n continue; status = ops->recover_open(sp, state); if (status >= 0) { - status = nfs4_reclaim_locks(ops, state); - if (status < 0) - goto out_err; - list_for_each_entry(lock, &state->lock_states, ls_locks) { - if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) - printk("%s: Lock reclaim failed!\n", + status = nfs4_reclaim_locks(state, ops); + if (status >= 0) { + list_for_each_entry(lock, &state->lock_states, ls_locks) { + if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) + printk("%s: Lock reclaim failed!\n", __func__); + } + continue; } - continue; } switch (status) { default: @@ -928,45 +928,45 @@ static void nfs4_state_mark_reclaim(struct nfs_client *clp) } } -static int reclaimer(void *ptr) +static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) { - struct nfs_client *clp = ptr; - struct nfs4_state_owner *sp; struct rb_node *pos; - struct nfs4_state_recovery_ops *ops; - struct rpc_cred *cred; int status = 0; - allow_signal(SIGKILL); + /* Note: list is protected by exclusive lock on cl->cl_sem */ + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + status = nfs4_reclaim_open_state(sp, ops); + if (status < 0) + break; + } + return status; +} + +static int nfs4_check_lease(struct nfs_client *clp) +{ + struct rpc_cred *cred; + int status = -NFS4ERR_EXPIRED; - /* Ensure exclusive access to NFSv4 state */ - down_write(&clp->cl_sem); - /* Are there any NFS mounts out there? */ - if (list_empty(&clp->cl_superblocks)) - goto out; -restart_loop: - ops = &nfs4_network_partition_recovery_ops; /* Are there any open files on this volume? */ cred = nfs4_get_renew_cred(clp); if (cred != NULL) { /* Yes there are: try to renew the old lease */ status = nfs4_proc_renew(clp, cred); put_rpccred(cred); - switch (status) { - case 0: - case -NFS4ERR_CB_PATH_DOWN: - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_LEASE_MOVED: - ops = &nfs4_reboot_recovery_ops; - } - } else { - /* "reboot" to ensure we clear all state on the server */ - clp->cl_boot_time = CURRENT_TIME; + return status; } - /* We're going to have to re-establish a clientid */ - nfs4_state_mark_reclaim(clp); - status = -ENOENT; + + /* "reboot" to ensure we clear all state on the server */ + clp->cl_boot_time = CURRENT_TIME; + return status; +} + +static int nfs4_reclaim_lease(struct nfs_client *clp) +{ + struct rpc_cred *cred; + int status = -ENOENT; + cred = nfs4_get_setclientid_cred(clp); if (cred != NULL) { status = nfs4_init_client(clp, cred); @@ -974,29 +974,61 @@ restart_loop: /* Handle case where the user hasn't set up machine creds */ if (status == -EACCES && cred == clp->cl_machine_cred) { nfs4_clear_machine_cred(clp); - goto restart_loop; + status = -EAGAIN; } } - if (status) - goto out_error; - /* Mark all delegations for reclaim */ - nfs_delegation_mark_reclaim(clp); - /* Note: list is protected by exclusive lock on cl->cl_sem */ - for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { - sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); - status = nfs4_reclaim_open_state(ops, sp); + return status; +} + +static int reclaimer(void *ptr) +{ + struct nfs_client *clp = ptr; + const struct nfs4_state_recovery_ops *ops; + int status = 0; + + allow_signal(SIGKILL); + + /* Ensure exclusive access to NFSv4 state */ + down_write(&clp->cl_sem); + while (!list_empty(&clp->cl_superblocks)) { + ops = &nfs4_network_partition_recovery_ops; + status = nfs4_check_lease(clp); + switch (status) { + case 0: + case -NFS4ERR_CB_PATH_DOWN: + goto out; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + ops = &nfs4_reboot_recovery_ops; + } + + /* We're going to have to re-establish a clientid */ + nfs4_state_mark_reclaim(clp); + + status = nfs4_reclaim_lease(clp); + if (status) { + if (status == -EAGAIN) + continue; + goto out_error; + } + + /* Mark all delegations for reclaim */ + nfs_delegation_mark_reclaim(clp); + /* Note: list is protected by exclusive lock on cl->cl_sem */ + status = nfs4_do_reclaim(clp, ops); if (status < 0) { if (status == -NFS4ERR_NO_GRACE) { ops = &nfs4_network_partition_recovery_ops; - status = nfs4_reclaim_open_state(ops, sp); + status = nfs4_do_reclaim(clp, ops); } if (status == -NFS4ERR_STALE_CLIENTID) - goto restart_loop; + continue; if (status == -NFS4ERR_EXPIRED) - goto restart_loop; + continue; } + nfs_delegation_reap_unclaimed(clp); + break; } - nfs_delegation_reap_unclaimed(clp); out: up_write(&clp->cl_sem); if (status == -NFS4ERR_CB_PATH_DOWN) -- cgit v1.2.3 From 6dc9d57af9917f5c7faa13c17b770dce17c3972b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:41 -0500 Subject: NFSv4: Callers to nfs4_get_renew_cred() need to hold nfs_client->cl_lock Ditto for nfs4_get_setclientid_cred(). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4renewd.c | 2 +- fs/nfs/nfs4state.c | 20 +++++++++++++++----- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ea790645fda..0ac6bbb8eaa 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -202,7 +202,7 @@ extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ -struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); +struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 3305acbbe2a..9fe8640a88e 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -77,7 +77,7 @@ nfs4_renew_state(struct work_struct *work) timeout = (2 * lease) / 3 + (long)last - (long)now; /* Are we close to a lease timeout? */ if (time_after(now, last + lease/3)) { - cred = nfs4_get_renew_cred(clp); + cred = nfs4_get_renew_cred_locked(clp); if (cred == NULL) { set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); spin_unlock(&clp->cl_lock); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 99182b3229e..300faba9a18 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -71,14 +71,12 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) return status; } -static struct rpc_cred *nfs4_get_machine_cred(struct nfs_client *clp) +static struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) { struct rpc_cred *cred = NULL; - spin_lock(&clp->cl_lock); if (clp->cl_machine_cred != NULL) cred = get_rpccred(clp->cl_machine_cred); - spin_unlock(&clp->cl_lock); return cred; } @@ -94,7 +92,7 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp) put_rpccred(cred); } -struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) +struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct rb_node *pos; @@ -110,13 +108,24 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) return cred; } +static struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) +{ + struct rpc_cred *cred; + + spin_lock(&clp->cl_lock); + cred = nfs4_get_renew_cred_locked(clp); + spin_unlock(&clp->cl_lock); + return cred; +} + static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct rb_node *pos; struct rpc_cred *cred; - cred = nfs4_get_machine_cred(clp); + spin_lock(&clp->cl_lock); + cred = nfs4_get_machine_cred_locked(clp); if (cred != NULL) goto out; pos = rb_first(&clp->cl_state_owners); @@ -125,6 +134,7 @@ static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) cred = get_rpccred(sp->so_cred); } out: + spin_unlock(&clp->cl_lock); return cred; } -- cgit v1.2.3 From b79a4a1b45b2543e38026303a1956bdc0aababa0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:41 -0500 Subject: NFSv4: Fix state recovery when the client runs over the grace period If the client for some reason is not able to recover all its state within the time allotted for the grace period, and the server reboots again, the client is not allowed to recover the state that was 'lost' using reboot recovery. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 8 ++- fs/nfs/nfs4proc.c | 7 +- fs/nfs/nfs4state.c | 189 ++++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 155 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0ac6bbb8eaa..1c6fbd1cda9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -40,6 +40,9 @@ struct idmap; enum nfs4_client_state { NFS4CLNT_STATE_RECOVER = 0, NFS4CLNT_LEASE_EXPIRED, + NFS4CLNT_RECLAIM_REBOOT, + NFS4CLNT_RECLAIM_NOGRACE, + NFS4CLNT_CB_PATH_DOWN, }; /* @@ -128,6 +131,8 @@ enum { NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ + NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */ + NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */ }; struct nfs4_state { @@ -160,6 +165,7 @@ struct nfs4_exception { }; struct nfs4_state_recovery_ops { + int state_flag_bit; int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); int (*recover_lock)(struct nfs4_state *, struct file_lock *); }; @@ -187,7 +193,7 @@ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; -extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops; extern const u32 nfs4_fattr_bitmap[2]; extern const u32 nfs4_statfs_bitmap[2]; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d53aa2dace8..279ab36b5a6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -984,7 +984,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) ret = nfs4_wait_clnt_recover(server->client, clp); if (ret != 0) return ret; - if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) break; nfs4_schedule_state_recovery(clp); } @@ -2942,7 +2942,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre spin_lock(&clp->cl_lock); clp->cl_lease_time = fsinfo.lease_time * HZ; clp->cl_last_renewal = now; - clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); spin_unlock(&clp->cl_lock); } return status; @@ -3690,11 +3689,13 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, } struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { + .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, }; -struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { +struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops = { + .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs4_open_expired, .recover_lock = nfs4_lock_expired, }; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 300faba9a18..e5cd8cacdce 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -821,6 +821,27 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) nfs4_recover_state(clp); } +static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) +{ + + set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); + /* Don't recover state that expired before the reboot */ + if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) { + clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); + return 0; + } + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + return 1; +} + +static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) +{ + set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); + clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); + set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); + return 1; +} + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { struct inode *inode = state->inode; @@ -869,6 +890,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs * server that doesn't support a grace period. */ list_for_each_entry(state, &sp->so_states, open_states) { + if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) + continue; if (state->state == 0) continue; status = ops->recover_open(sp, state); @@ -888,8 +911,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", __func__, status); case -ENOENT: - case -NFS4ERR_RECLAIM_BAD: - case -NFS4ERR_RECLAIM_CONFLICT: + case -ESTALE: /* * Open state on this file cannot be recovered * All we can do is revert to using the zero stateid. @@ -899,8 +921,13 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs /* Mark the file as being 'closed' */ state->state = 0; break; + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + nfs4_state_mark_reclaim_nograce(sp->so_client, state); + break; case -NFS4ERR_EXPIRED: case -NFS4ERR_NO_GRACE: + nfs4_state_mark_reclaim_nograce(sp->so_client, state); case -NFS4ERR_STALE_CLIENTID: goto out_err; } @@ -910,12 +937,26 @@ out_err: return status; } -static void nfs4_state_mark_reclaim(struct nfs_client *clp) +static void nfs4_clear_open_state(struct nfs4_state *state) +{ + struct nfs4_lock_state *lock; + + clear_bit(NFS_DELEGATED_STATE, &state->flags); + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); + list_for_each_entry(lock, &state->lock_states, ls_locks) { + lock->ls_seqid.counter = 0; + lock->ls_seqid.flags = 0; + lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + } +} + +static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state)) { struct nfs4_state_owner *sp; struct rb_node *pos; struct nfs4_state *state; - struct nfs4_lock_state *lock; /* Reset all sequence ids to zero */ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { @@ -924,20 +965,60 @@ static void nfs4_state_mark_reclaim(struct nfs_client *clp) sp->so_seqid.flags = 0; spin_lock(&sp->so_lock); list_for_each_entry(state, &sp->so_states, open_states) { - clear_bit(NFS_DELEGATED_STATE, &state->flags); - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - clear_bit(NFS_O_WRONLY_STATE, &state->flags); - clear_bit(NFS_O_RDWR_STATE, &state->flags); - list_for_each_entry(lock, &state->lock_states, ls_locks) { - lock->ls_seqid.counter = 0; - lock->ls_seqid.flags = 0; - lock->ls_flags &= ~NFS_LOCK_INITIALIZED; - } + if (mark_reclaim(clp, state)) + nfs4_clear_open_state(state); } spin_unlock(&sp->so_lock); } } +static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) +{ + /* Mark all delegations for reclaim */ + nfs_delegation_mark_reclaim(clp); + nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); +} + +static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) +{ + struct nfs4_state_owner *sp; + struct rb_node *pos; + struct nfs4_state *state; + + if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) + return; + + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { + sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + spin_lock(&sp->so_lock); + list_for_each_entry(state, &sp->so_states, open_states) { + if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags)) + continue; + nfs4_state_mark_reclaim_nograce(clp, state); + } + spin_unlock(&sp->so_lock); + } + + nfs_delegation_reap_unclaimed(clp); +} + +static void nfs_delegation_clear_all(struct nfs_client *clp) +{ + nfs_delegation_mark_reclaim(clp); + nfs_delegation_reap_unclaimed(clp); +} + +static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) +{ + nfs_delegation_clear_all(clp); + nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); +} + +static void nfs4_state_end_reclaim_nograce(struct nfs_client *clp) +{ + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); +} + static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) { struct rb_node *pos; @@ -964,11 +1045,25 @@ static int nfs4_check_lease(struct nfs_client *clp) /* Yes there are: try to renew the old lease */ status = nfs4_proc_renew(clp, cred); put_rpccred(cred); + switch (status) { + case -NFS4ERR_CB_PATH_DOWN: + set_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state); + break; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_reboot(clp); + break; + case -NFS4ERR_EXPIRED: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); + } return status; } /* "reboot" to ensure we clear all state on the server */ clp->cl_boot_time = CURRENT_TIME; + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); return status; } @@ -993,7 +1088,6 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) static int reclaimer(void *ptr) { struct nfs_client *clp = ptr; - const struct nfs4_state_recovery_ops *ops; int status = 0; allow_signal(SIGKILL); @@ -1001,47 +1095,51 @@ static int reclaimer(void *ptr) /* Ensure exclusive access to NFSv4 state */ down_write(&clp->cl_sem); while (!list_empty(&clp->cl_superblocks)) { - ops = &nfs4_network_partition_recovery_ops; status = nfs4_check_lease(clp); - switch (status) { - case 0: - case -NFS4ERR_CB_PATH_DOWN: - goto out; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_LEASE_MOVED: - ops = &nfs4_reboot_recovery_ops; - } - /* We're going to have to re-establish a clientid */ - nfs4_state_mark_reclaim(clp); + if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + /* We're going to have to re-establish a clientid */ + status = nfs4_reclaim_lease(clp); + if (status) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + if (status == -EAGAIN) + continue; + goto out_error; + } + } - status = nfs4_reclaim_lease(clp); - if (status) { - if (status == -EAGAIN) + /* First recover reboot state... */ + if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { + /* Note: list is protected by exclusive lock on cl->cl_sem */ + status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops); + if (status == -NFS4ERR_STALE_CLIENTID) { + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); continue; - goto out_error; + } + nfs4_state_end_reclaim_reboot(clp); + continue; } - /* Mark all delegations for reclaim */ - nfs_delegation_mark_reclaim(clp); - /* Note: list is protected by exclusive lock on cl->cl_sem */ - status = nfs4_do_reclaim(clp, ops); - if (status < 0) { - if (status == -NFS4ERR_NO_GRACE) { - ops = &nfs4_network_partition_recovery_ops; - status = nfs4_do_reclaim(clp, ops); - } - if (status == -NFS4ERR_STALE_CLIENTID) - continue; - if (status == -NFS4ERR_EXPIRED) - continue; + /* Now recover expired state... */ + if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { + /* Note: list is protected by exclusive lock on cl->cl_sem */ + status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops); + if (status < 0) { + set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); + if (status == -NFS4ERR_STALE_CLIENTID) + continue; + if (status == -NFS4ERR_EXPIRED) + continue; + goto out_error; + } else + nfs4_state_end_reclaim_nograce(clp); + continue; } - nfs_delegation_reap_unclaimed(clp); break; } out: up_write(&clp->cl_sem); - if (status == -NFS4ERR_CB_PATH_DOWN) + if (test_and_clear_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state)) nfs_handle_cb_pathdown(clp); nfs4_clear_recover_bit(clp); nfs_put_client(clp); @@ -1050,7 +1148,8 @@ out: out_error: printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %s" " with error %d\n", clp->cl_hostname, -status); - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) + nfs4_state_end_reclaim_reboot(clp); goto out; } -- cgit v1.2.3 From e598d843c08a7ab6bdfa8098de49afb017fc6c6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:42 -0500 Subject: NFSv4: Remove redundant RENEW calls if we know the lease has expired Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 3 ++- fs/nfs/nfs4state.c | 47 +++++++++++++++++++++++++++++------------------ 3 files changed, 32 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1c6fbd1cda9..76eda46b5a1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -39,6 +39,7 @@ struct idmap; enum nfs4_client_state { NFS4CLNT_STATE_RECOVER = 0, + NFS4CLNT_CHECK_LEASE, NFS4CLNT_LEASE_EXPIRED, NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 279ab36b5a6..780ba004b3d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -984,7 +984,8 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) ret = nfs4_wait_clnt_recover(server->client, clp); if (ret != 0) return ret; - if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && + !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) break; nfs4_schedule_state_recovery(clp); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e5cd8cacdce..3cc88a5d988 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -817,6 +817,8 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) { if (!clp) return; + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); if (test_and_set_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) nfs4_recover_state(clp); } @@ -1019,6 +1021,23 @@ static void nfs4_state_end_reclaim_nograce(struct nfs_client *clp) clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); } +static void nfs4_recovery_handle_error(struct nfs_client *clp, int error) +{ + switch (error) { + case -NFS4ERR_CB_PATH_DOWN: + set_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state); + break; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_reboot(clp); + break; + case -NFS4ERR_EXPIRED: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); + } +} + static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) { struct rb_node *pos; @@ -1031,6 +1050,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov if (status < 0) break; } + nfs4_recovery_handle_error(clp, status); return status; } @@ -1045,19 +1065,7 @@ static int nfs4_check_lease(struct nfs_client *clp) /* Yes there are: try to renew the old lease */ status = nfs4_proc_renew(clp, cred); put_rpccred(cred); - switch (status) { - case -NFS4ERR_CB_PATH_DOWN: - set_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state); - break; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_LEASE_MOVED: - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_start_reclaim_reboot(clp); - break; - case -NFS4ERR_EXPIRED: - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_start_reclaim_nograce(clp); - } + nfs4_recovery_handle_error(clp, status); return status; } @@ -1095,8 +1103,6 @@ static int reclaimer(void *ptr) /* Ensure exclusive access to NFSv4 state */ down_write(&clp->cl_sem); while (!list_empty(&clp->cl_superblocks)) { - status = nfs4_check_lease(clp); - if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); @@ -1106,16 +1112,21 @@ static int reclaimer(void *ptr) continue; goto out_error; } + clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + } + + if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { + status = nfs4_check_lease(clp); + if (status != 0) + continue; } /* First recover reboot state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { /* Note: list is protected by exclusive lock on cl->cl_sem */ status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops); - if (status == -NFS4ERR_STALE_CLIENTID) { - set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + if (status == -NFS4ERR_STALE_CLIENTID) continue; - } nfs4_state_end_reclaim_reboot(clp); continue; } -- cgit v1.2.3 From 0f605b56008c4b6b075217480c36ba395ca4eaa4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:42 -0500 Subject: NFSv4: Don't tell server we rebooted when not necessary Instead of doing a full setclientid, try doing a RENEW call first. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3cc88a5d988..a780518c5c3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1059,19 +1059,19 @@ static int nfs4_check_lease(struct nfs_client *clp) struct rpc_cred *cred; int status = -NFS4ERR_EXPIRED; - /* Are there any open files on this volume? */ + /* Is the client already known to have an expired lease? */ + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + return 0; cred = nfs4_get_renew_cred(clp); - if (cred != NULL) { - /* Yes there are: try to renew the old lease */ - status = nfs4_proc_renew(clp, cred); - put_rpccred(cred); - nfs4_recovery_handle_error(clp, status); - return status; + if (cred == NULL) { + cred = nfs4_get_setclientid_cred(clp); + if (cred == NULL) + goto out; } - - /* "reboot" to ensure we clear all state on the server */ - clp->cl_boot_time = CURRENT_TIME; - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + status = nfs4_proc_renew(clp, cred); + put_rpccred(cred); +out: + nfs4_recovery_handle_error(clp, status); return status; } -- cgit v1.2.3 From 7eff03aec917e17f733471d7e12c262c0c96409f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:43 -0500 Subject: NFSv4: Add a recovery marking scheme for state owners Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 7 +++++++ fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4state.c | 21 +++++++++++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 76eda46b5a1..a4e7b3feef8 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -94,12 +94,18 @@ struct nfs4_state_owner { spinlock_t so_lock; atomic_t so_count; + unsigned long so_flags; struct list_head so_states; struct list_head so_delegations; struct nfs_seqid_counter so_seqid; struct rpc_sequence so_sequence; }; +enum { + NFS_OWNER_RECLAIM_REBOOT, + NFS_OWNER_RECLAIM_NOGRACE +}; + /* * struct nfs4_state maintains the client-side state for a given * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). @@ -166,6 +172,7 @@ struct nfs4_exception { }; struct nfs4_state_recovery_ops { + int owner_flag_bit; int state_flag_bit; int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); int (*recover_lock)(struct nfs4_state *, struct file_lock *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 780ba004b3d..019c8d67e14 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3690,12 +3690,14 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, } struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, }; struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs4_open_expired, .recover_lock = nfs4_lock_expired, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a780518c5c3..7dcca23167d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -832,6 +832,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); return 0; } + set_bit(NFS_OWNER_RECLAIM_REBOOT, &state->owner->so_flags); set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); return 1; } @@ -840,6 +841,7 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s { set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); + set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags); set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); return 1; } @@ -1043,14 +1045,25 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov struct rb_node *pos; int status = 0; - /* Note: list is protected by exclusive lock on cl->cl_sem */ +restart: + spin_lock(&clp->cl_lock); for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); + if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags)) + continue; + atomic_inc(&sp->so_count); + spin_unlock(&clp->cl_lock); status = nfs4_reclaim_open_state(sp, ops); - if (status < 0) - break; + if (status < 0) { + set_bit(ops->owner_flag_bit, &sp->so_flags); + nfs4_put_state_owner(sp); + nfs4_recovery_handle_error(clp, status); + return status; + } + nfs4_put_state_owner(sp); + goto restart; } - nfs4_recovery_handle_error(clp, status); + spin_unlock(&clp->cl_lock); return status; } -- cgit v1.2.3 From fe1d81952e7f62b9da7dc438caaa07e35ec2b908 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:43 -0500 Subject: NFSv4: Ensure that nfs4_reclaim_open_state() doesn't depend on cl_sem Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7dcca23167d..beade5570b5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -893,11 +893,15 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs * recovering after a network partition or a reboot from a * server that doesn't support a grace period. */ +restart: + spin_lock(&sp->so_lock); list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) continue; if (state->state == 0) continue; + atomic_inc(&state->count); + spin_unlock(&sp->so_lock); status = ops->recover_open(sp, state); if (status >= 0) { status = nfs4_reclaim_locks(state, ops); @@ -907,7 +911,8 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs printk("%s: Lock reclaim failed!\n", __func__); } - continue; + nfs4_put_open_state(state); + goto restart; } } switch (status) { @@ -935,9 +940,13 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs case -NFS4ERR_STALE_CLIENTID: goto out_err; } + nfs4_put_open_state(state); + goto restart; } + spin_unlock(&sp->so_lock); return 0; out_err: + nfs4_put_open_state(state); return status; } -- cgit v1.2.3 From 65de872ed6501a68e918a49a5c2fa7fca9c6ce21 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:44 -0500 Subject: NFS: Remove the unnecessary argument to nfs4_wait_clnt_recover() ...and move some code around in order to clear out an unnecessary forward declaration. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 151 +++++++++++++++++++++++++++--------------------------- 1 file changed, 75 insertions(+), 76 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 019c8d67e14..660c5dcfb0a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -63,8 +63,6 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); -static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); -static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); @@ -195,6 +193,80 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent kunmap_atomic(start, KM_USER0); } +static int nfs4_wait_bit_killable(void *word) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + +static int nfs4_wait_clnt_recover(struct nfs_client *clp) +{ + int res; + + might_sleep(); + + rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_); + + res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER, + nfs4_wait_bit_killable, TASK_KILLABLE); + + rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_); + return res; +} + +static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) +{ + int res = 0; + + might_sleep(); + + if (*timeout <= 0) + *timeout = NFS4_POLL_RETRY_MIN; + if (*timeout > NFS4_POLL_RETRY_MAX) + *timeout = NFS4_POLL_RETRY_MAX; + schedule_timeout_killable(*timeout); + if (fatal_signal_pending(current)) + res = -ERESTARTSYS; + *timeout <<= 1; + return res; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret = errorcode; + + exception->retry = 0; + switch(errorcode) { + case 0: + return 0; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(clp); + ret = nfs4_wait_clnt_recover(clp); + if (ret == 0) + exception->retry = 1; + break; + case -NFS4ERR_FILE_OPEN: + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + ret = nfs4_delay(server->client, &exception->timeout); + if (ret != 0) + break; + case -NFS4ERR_OLD_STATEID: + exception->retry = 1; + } + /* We failed to handle the error */ + return nfs4_map_errors(ret); +} + + static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { struct nfs_client *clp = server->nfs_client; @@ -981,7 +1053,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) int ret; for (;;) { - ret = nfs4_wait_clnt_recover(server->client, clp); + ret = nfs4_wait_clnt_recover(clp); if (ret != 0) return ret; if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && @@ -2799,79 +2871,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) return 0; } -static int nfs4_wait_bit_killable(void *word) -{ - if (fatal_signal_pending(current)) - return -ERESTARTSYS; - schedule(); - return 0; -} - -static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp) -{ - int res; - - might_sleep(); - - rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_); - - res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER, - nfs4_wait_bit_killable, TASK_KILLABLE); - - rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_); - return res; -} - -static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) -{ - int res = 0; - - might_sleep(); - - if (*timeout <= 0) - *timeout = NFS4_POLL_RETRY_MIN; - if (*timeout > NFS4_POLL_RETRY_MAX) - *timeout = NFS4_POLL_RETRY_MAX; - schedule_timeout_killable(*timeout); - if (fatal_signal_pending(current)) - res = -ERESTARTSYS; - *timeout <<= 1; - return res; -} - -/* This is the error handling routine for processes that are allowed - * to sleep. - */ -static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) -{ - struct nfs_client *clp = server->nfs_client; - int ret = errorcode; - - exception->retry = 0; - switch(errorcode) { - case 0: - return 0; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(clp); - ret = nfs4_wait_clnt_recover(server->client, clp); - if (ret == 0) - exception->retry = 1; - break; - case -NFS4ERR_FILE_OPEN: - case -NFS4ERR_GRACE: - case -NFS4ERR_DELAY: - ret = nfs4_delay(server->client, &exception->timeout); - if (ret != 0) - break; - case -NFS4ERR_OLD_STATEID: - exception->retry = 1; - } - /* We failed to handle the error */ - return nfs4_map_errors(ret); -} - int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) { nfs4_verifier sc_verifier; -- cgit v1.2.3 From 19e03c570e6099ffaf24e5628d4fe1a8acbe820d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:44 -0500 Subject: NFSv4: Ensure that file unlock requests don't conflict with state recovery The unlock path is currently failing to take the nfs_client->cl_sem read lock, and hence the recovery path may see locks disappear from underneath it. Also ensure that it takes the nfs_inode->rwsem read lock so that it there is no conflict with delegation recalls. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 ++++++++++++++++---------- fs/nfs/nfs4state.c | 4 ++++ 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 660c5dcfb0a..aec4e47c462 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3273,6 +3273,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) { + struct nfs_client *clp = state->owner->so_client; + struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_seqid *seqid; struct nfs4_lock_state *lsp; struct rpc_task *task; @@ -3282,8 +3284,15 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = nfs4_set_lock_state(state, request); /* Unlock _before_ we do the RPC call */ request->fl_flags |= FL_EXISTS; - if (do_vfs_lock(request->fl_file, request) == -ENOENT) + down_read(&clp->cl_sem); + down_read(&nfsi->rwsem); + if (do_vfs_lock(request->fl_file, request) == -ENOENT) { + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); goto out; + } + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); if (status != 0) goto out; /* Is this a delegated lock? */ @@ -3510,6 +3519,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs_client *clp = state->owner->so_client; + struct nfs_inode *nfsi = NFS_I(state->inode); unsigned char fl_flags = request->fl_flags; int status; @@ -3522,18 +3532,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (status < 0) goto out; down_read(&clp->cl_sem); + down_read(&nfsi->rwsem); if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - struct nfs_inode *nfsi = NFS_I(state->inode); /* Yes: cache locks! */ - down_read(&nfsi->rwsem); /* ...but avoid races with delegation recall... */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(request->fl_file, request); - up_read(&nfsi->rwsem); - goto out_unlock; - } - up_read(&nfsi->rwsem); + request->fl_flags = fl_flags & ~FL_SLEEP; + status = do_vfs_lock(request->fl_file, request); + goto out_unlock; } status = _nfs4_do_setlk(state, cmd, request, 0); if (status != 0) @@ -3543,6 +3548,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock if (do_vfs_lock(request->fl_file, request) < 0) printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); out_unlock: + up_read(&nfsi->rwsem); up_read(&clp->cl_sem); out: request->fl_flags = fl_flags; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index beade5570b5..16c9fbdf97b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -849,9 +849,11 @@ static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_s static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { struct inode *inode = state->inode; + struct nfs_inode *nfsi = NFS_I(inode); struct file_lock *fl; int status = 0; + down_write(&nfsi->rwsem); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; @@ -874,8 +876,10 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ goto out_err; } } + up_write(&nfsi->rwsem); return 0; out_err: + up_write(&nfsi->rwsem); return status; } -- cgit v1.2.3 From 95d35cb4c473c754824967c0b069bbeb7efa4847 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:45 -0500 Subject: NFSv4: Remove nfs_client->cl_sem Now that we're using the flags to indicate state that needs to be recovered, as well as having implemented proper refcounting and spinlocking on the state and open_owners, we can get rid of nfs_client->cl_sem. The only remaining case that was dubious was the file locking, and that case is now covered by the nfsi->rwsem. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - fs/nfs/delegation.c | 5 ----- fs/nfs/nfs4proc.c | 20 +------------------- fs/nfs/nfs4renewd.c | 3 --- fs/nfs/nfs4state.c | 17 ----------------- 5 files changed, 1 insertion(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 70b6d9e8517..b643f0ec5c2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -143,7 +143,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_proto = cl_init->proto; #ifdef CONFIG_NFS_V4 - init_rwsem(&clp->cl_sem); INIT_LIST_HEAD(&clp->cl_delegations); spin_lock_init(&clp->cl_lock); INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 646ba3e75a1..ebc06f2da31 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -243,16 +243,13 @@ static void nfs_msync_inode(struct inode *inode) */ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); nfs_msync_inode(inode); - down_read(&clp->cl_sem); /* Guard against new delegated open calls */ down_write(&nfsi->rwsem); nfs_delegation_claim_opens(inode, &delegation->stateid); up_write(&nfsi->rwsem); - up_read(&clp->cl_sem); nfs_msync_inode(inode); return nfs_do_return_delegation(inode, delegation, 1); @@ -425,7 +422,6 @@ static int recall_thread(void *data) daemonize("nfsv4-delegreturn"); nfs_msync_inode(inode); - down_read(&clp->cl_sem); down_write(&nfsi->rwsem); spin_lock(&clp->cl_lock); delegation = nfs_detach_delegation_locked(nfsi, args->stateid); @@ -437,7 +433,6 @@ static int recall_thread(void *data) complete(&args->started); nfs_delegation_claim_opens(inode, args->stateid); up_write(&nfsi->rwsem); - up_read(&clp->cl_sem); nfs_msync_inode(inode); if (delegation != NULL) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aec4e47c462..26dcd77abb0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -207,12 +207,8 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); - rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_); - res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER, nfs4_wait_bit_killable, TASK_KILLABLE); - - rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_); return res; } @@ -1135,7 +1131,6 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); - struct nfs_client *clp = server->nfs_client; struct nfs4_opendata *opendata; int status; @@ -1150,11 +1145,10 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct goto err_put_state_owner; if (path->dentry->d_inode != NULL) nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE)); - down_read(&clp->cl_sem); status = -ENOMEM; opendata = nfs4_opendata_alloc(path, sp, flags, sattr); if (opendata == NULL) - goto err_release_rwsem; + goto err_put_state_owner; if (path->dentry->d_inode != NULL) opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); @@ -1172,13 +1166,10 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct goto err_opendata_put; nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); - up_read(&clp->cl_sem); *res = state; return 0; err_opendata_put: nfs4_opendata_put(opendata); -err_release_rwsem: - up_read(&clp->cl_sem); err_put_state_owner: nfs4_put_state_owner(sp); out_err: @@ -3099,7 +3090,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock struct nfs4_lock_state *lsp; int status; - down_read(&clp->cl_sem); arg.lock_owner.clientid = clp->cl_clientid; status = nfs4_set_lock_state(state, request); if (status != 0) @@ -3116,7 +3106,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock } request->fl_ops->fl_release_private(request); out: - up_read(&clp->cl_sem); return status; } @@ -3273,7 +3262,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs_client *clp = state->owner->so_client; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_seqid *seqid; struct nfs4_lock_state *lsp; @@ -3284,15 +3272,12 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = nfs4_set_lock_state(state, request); /* Unlock _before_ we do the RPC call */ request->fl_flags |= FL_EXISTS; - down_read(&clp->cl_sem); down_read(&nfsi->rwsem); if (do_vfs_lock(request->fl_file, request) == -ENOENT) { up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); goto out; } up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); if (status != 0) goto out; /* Is this a delegated lock? */ @@ -3518,7 +3503,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs_client *clp = state->owner->so_client; struct nfs_inode *nfsi = NFS_I(state->inode); unsigned char fl_flags = request->fl_flags; int status; @@ -3531,7 +3515,6 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock status = do_vfs_lock(request->fl_file, request); if (status < 0) goto out; - down_read(&clp->cl_sem); down_read(&nfsi->rwsem); if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { /* Yes: cache locks! */ @@ -3549,7 +3532,6 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); out_unlock: up_read(&nfsi->rwsem); - up_read(&clp->cl_sem); out: request->fl_flags = fl_flags; return status; diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 9fe8640a88e..6101f955f23 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -65,7 +65,6 @@ nfs4_renew_state(struct work_struct *work) long lease, timeout; unsigned long last, now; - down_read(&clp->cl_sem); dprintk("%s: start\n", __func__); /* Are there any active superblocks? */ if (list_empty(&clp->cl_superblocks)) @@ -101,11 +100,9 @@ nfs4_renew_state(struct work_struct *work) schedule_delayed_work(&clp->cl_renewd, timeout); spin_unlock(&clp->cl_lock); out: - up_read(&clp->cl_sem); dprintk("%s: done\n", __func__); } -/* Must be called with clp->cl_sem locked for writes */ void nfs4_schedule_state_renewal(struct nfs_client *clp) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 16c9fbdf97b..ec0cbe00a80 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -305,10 +305,6 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) } } -/* - * Note: must be called with clp->cl_sem held in order to prevent races - * with reboot recovery! - */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { struct nfs_client *clp = server->nfs_client; @@ -337,10 +333,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct return sp; } -/* - * Must be called with clp->cl_sem held in order to avoid races - * with state recovery... - */ void nfs4_put_state_owner(struct nfs4_state_owner *sp) { struct nfs_client *clp = sp->so_client; @@ -442,10 +434,6 @@ out: return state; } -/* - * Beware! Caller must be holding exactly one - * reference to clp->cl_sem! - */ void nfs4_put_open_state(struct nfs4_state *state) { struct inode *inode = state->inode; @@ -578,7 +566,6 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding clp->cl_sem */ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { @@ -1127,7 +1114,6 @@ static int reclaimer(void *ptr) allow_signal(SIGKILL); /* Ensure exclusive access to NFSv4 state */ - down_write(&clp->cl_sem); while (!list_empty(&clp->cl_superblocks)) { if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { /* We're going to have to re-establish a clientid */ @@ -1149,7 +1135,6 @@ static int reclaimer(void *ptr) /* First recover reboot state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { - /* Note: list is protected by exclusive lock on cl->cl_sem */ status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops); if (status == -NFS4ERR_STALE_CLIENTID) continue; @@ -1159,7 +1144,6 @@ static int reclaimer(void *ptr) /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { - /* Note: list is protected by exclusive lock on cl->cl_sem */ status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops); if (status < 0) { set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); @@ -1175,7 +1159,6 @@ static int reclaimer(void *ptr) break; } out: - up_write(&clp->cl_sem); if (test_and_clear_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state)) nfs_handle_cb_pathdown(clp); nfs4_clear_recover_bit(clp); -- cgit v1.2.3 From 9e33bed55239bdcee1746c31a11177d239bac1b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:46 -0500 Subject: NFSv4: Add recovery for individual stateids NFSv4 defines a number of state errors which the client does not currently handle. Among those we should worry about are: NFS4ERR_ADMIN_REVOKED - the server's administrator revoked our locks and/or delegations. NFS4ERR_BAD_STATEID - the client and server are out of sync, possibly due to a delegation return racing with an OPEN request. NFS4ERR_OPENMODE - the client attempted to do something not sanctioned by the open mode of the stateid. Should normally just occur as a result of a delegation return race. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 37 ++++++++++++++++++++++++++++--------- fs/nfs/nfs4state.c | 2 +- 3 files changed, 31 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a4e7b3feef8..46cbc0cdf88 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -169,6 +169,7 @@ struct nfs4_state { struct nfs4_exception { long timeout; int retry; + struct nfs4_state *state; }; struct nfs4_state_recovery_ops { @@ -226,6 +227,7 @@ extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); +extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26dcd77abb0..8a2dee9caa6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -62,7 +62,7 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); +static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); @@ -235,12 +235,19 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; + struct nfs4_state *state = exception->state; int ret = errorcode; exception->retry = 0; switch(errorcode) { case 0: return 0; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + if (state == NULL) + break; + nfs4_state_mark_reclaim_nograce(clp, state); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -1320,10 +1327,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data) renew_lease(server, calldata->timestamp); break; case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_BAD_STATEID: case -NFS4ERR_EXPIRED: - break; + if (calldata->arg.open_flags == 0) + break; default: - if (nfs4_async_handle_error(task, server) == -EAGAIN) { + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { rpc_restart_call(task); return; } @@ -1418,6 +1428,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); if (calldata->arg.seqid == NULL) goto out_free_calldata; + calldata->arg.open_flags = 0; calldata->arg.bitmask = server->attr_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; @@ -2064,7 +2075,7 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) { struct nfs_removeres *res = task->tk_msg.rpc_resp; - if (nfs4_async_handle_error(task, res->server) == -EAGAIN) + if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) return 0; update_changeattr(dir, &res->cinfo); nfs_post_op_update_inode(dir, &res->dir_attr); @@ -2492,7 +2503,7 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->inode); - if (nfs4_async_handle_error(task, server) == -EAGAIN) { + if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { rpc_restart_call(task); return -EAGAIN; } @@ -2513,7 +2524,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; - if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { rpc_restart_call(task); return -EAGAIN; } @@ -2539,7 +2550,7 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; - if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { rpc_restart_call(task); return -EAGAIN; } @@ -2832,13 +2843,19 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen } static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) +nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) { struct nfs_client *clp = server->nfs_client; if (!clp || task->tk_status >= 0) return 0; switch(task->tk_status) { + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + if (state == NULL) + break; + nfs4_state_mark_reclaim_nograce(clp, state); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -3195,11 +3212,13 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) sizeof(calldata->lsp->ls_stateid.data)); renew_lease(calldata->server, calldata->timestamp); break; + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: break; default: - if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) + if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) rpc_restart_call(task); } } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ec0cbe00a80..3d78706b3ef 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -824,7 +824,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st return 1; } -static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) +int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) { set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); -- cgit v1.2.3 From 515d86117724abe39d7d57d7ccc7cc5c44480529 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:46 -0500 Subject: NFSv4: Clean up the support for returning multiple delegations Add a flag to mark delegations as requiring return, then run a garbage collector. In the future, this will allow for more flexible delegation management, where delegations may be marked for return if it turns out that they are not being referenced. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 97 +++++++++++++++++++++++++---------------------------- fs/nfs/delegation.h | 8 +++-- fs/nfs/super.c | 2 +- 3 files changed, 52 insertions(+), 55 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ebc06f2da31..bc9d4f9a788 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -255,6 +255,34 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat return nfs_do_return_delegation(inode, delegation, 1); } +/* + * Return all delegations that have been marked for return + */ +static void nfs_client_return_marked_delegations(struct nfs_client *clp) +{ + struct nfs_delegation *delegation; + struct inode *inode; + +restart: + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) + continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; + spin_lock(&clp->cl_lock); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); + spin_unlock(&clp->cl_lock); + rcu_read_unlock(); + if (delegation != NULL) + __nfs_inode_return_delegation(inode, delegation); + iput(inode); + goto restart; + } + rcu_read_unlock(); +} + /* * This function returns the delegation without reclaiming opens * or protecting against delegation reclaims. @@ -296,63 +324,46 @@ int nfs_inode_return_delegation(struct inode *inode) /* * Return all delegations associated to a super block */ -void nfs_return_all_delegations(struct super_block *sb) +void nfs_super_return_all_delegations(struct super_block *sb) { struct nfs_client *clp = NFS_SB(sb)->nfs_client; struct nfs_delegation *delegation; - struct inode *inode; if (clp == NULL) return; -restart: rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - inode = NULL; spin_lock(&delegation->lock); if (delegation->inode != NULL && delegation->inode->i_sb == sb) - inode = igrab(delegation->inode); + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); spin_unlock(&delegation->lock); - if (inode == NULL) - continue; - spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); - spin_unlock(&clp->cl_lock); - rcu_read_unlock(); - if (delegation != NULL) - __nfs_inode_return_delegation(inode, delegation); - iput(inode); - goto restart; } rcu_read_unlock(); + nfs_client_return_marked_delegations(clp); +} + +static void nfs_client_return_all_delegations(struct nfs_client *clp) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + rcu_read_unlock(); + nfs_client_return_marked_delegations(clp); } static int nfs_do_expire_all_delegations(void *ptr) { struct nfs_client *clp = ptr; - struct nfs_delegation *delegation; - struct inode *inode; allow_signal(SIGKILL); -restart: + if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) goto out; if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) goto out; - rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) - continue; - spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); - spin_unlock(&clp->cl_lock); - rcu_read_unlock(); - if (delegation) - __nfs_inode_return_delegation(inode, delegation); - iput(inode); - goto restart; - } - rcu_read_unlock(); + nfs_client_return_all_delegations(clp); out: nfs_put_client(clp); module_put_and_exit(0); @@ -379,27 +390,9 @@ void nfs_expire_all_delegations(struct nfs_client *clp) */ void nfs_handle_cb_pathdown(struct nfs_client *clp) { - struct nfs_delegation *delegation; - struct inode *inode; - if (clp == NULL) return; -restart: - rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) - continue; - spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); - spin_unlock(&clp->cl_lock); - rcu_read_unlock(); - if (delegation != NULL) - __nfs_inode_return_delegation(inode, delegation); - iput(inode); - goto restart; - } - rcu_read_unlock(); + nfs_client_return_all_delegations(clp); } struct recall_threadargs { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5e9f40e0a7d..c772bab12ee 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -20,12 +20,16 @@ struct nfs_delegation { int type; loff_t maxsize; __u64 change_attr; -#define NFS_DELEGATION_NEED_RECLAIM 0 unsigned long flags; spinlock_t lock; struct rcu_head rcu; }; +enum { + NFS_DELEGATION_NEED_RECLAIM = 0, + NFS_DELEGATION_RETURN, +}; + int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); int nfs_inode_return_delegation(struct inode *inode); @@ -33,7 +37,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); -void nfs_return_all_delegations(struct super_block *sb); +void nfs_super_return_all_delegations(struct super_block *sb); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_handle_cb_pathdown(struct nfs_client *clp); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d8e062fe76b..bd4c3dd550d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2433,7 +2433,7 @@ static void nfs4_kill_super(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); - nfs_return_all_delegations(sb); + nfs_super_return_all_delegations(sb); kill_anon_super(sb); nfs4_renewd_prepare_shutdown(server); -- cgit v1.2.3 From 707fb4b324371f1b4bea5eb29e39d265c66086ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:47 -0500 Subject: NFSv4: Clean up NFS4ERR_CB_PATH_DOWN error management... Add a delegation cleanup phase to the state management loop, and do the NFS4ERR_CB_PATH_DOWN recovery there. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 14 ++++++++------ fs/nfs/delegation.h | 1 + fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4state.c | 9 ++++++--- 4 files changed, 16 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bc9d4f9a788..ff2c1585d23 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -258,7 +258,7 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat /* * Return all delegations that have been marked for return */ -static void nfs_client_return_marked_delegations(struct nfs_client *clp) +void nfs_client_return_marked_delegations(struct nfs_client *clp) { struct nfs_delegation *delegation; struct inode *inode; @@ -342,15 +342,16 @@ void nfs_super_return_all_delegations(struct super_block *sb) nfs_client_return_marked_delegations(clp); } -static void nfs_client_return_all_delegations(struct nfs_client *clp) +static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) { struct nfs_delegation *delegation; rcu_read_lock(); - list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + } rcu_read_unlock(); - nfs_client_return_marked_delegations(clp); } static int nfs_do_expire_all_delegations(void *ptr) @@ -363,7 +364,8 @@ static int nfs_do_expire_all_delegations(void *ptr) goto out; if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) goto out; - nfs_client_return_all_delegations(clp); + nfs_client_mark_return_all_delegations(clp); + nfs_client_return_marked_delegations(clp); out: nfs_put_client(clp); module_put_and_exit(0); @@ -392,7 +394,7 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp) { if (clp == NULL) return; - nfs_client_return_all_delegations(clp); + nfs_client_mark_return_all_delegations(clp); } struct recall_threadargs { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index c772bab12ee..56f3eb558ef 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -40,6 +40,7 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs void nfs_super_return_all_delegations(struct super_block *sb); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_handle_cb_pathdown(struct nfs_client *clp); +void nfs_client_return_marked_delegations(struct nfs_client *clp); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 46cbc0cdf88..84a4f5223f6 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -43,7 +43,7 @@ enum nfs4_client_state { NFS4CLNT_LEASE_EXPIRED, NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, - NFS4CLNT_CB_PATH_DOWN, + NFS4CLNT_DELEGRETURN, }; /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3d78706b3ef..2894502b94d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1027,7 +1027,7 @@ static void nfs4_recovery_handle_error(struct nfs_client *clp, int error) { switch (error) { case -NFS4ERR_CB_PATH_DOWN: - set_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state); + nfs_handle_cb_pathdown(clp); break; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_LEASE_MOVED: @@ -1156,11 +1156,14 @@ static int reclaimer(void *ptr) nfs4_state_end_reclaim_nograce(clp); continue; } + + if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { + nfs_client_return_marked_delegations(clp); + continue; + } break; } out: - if (test_and_clear_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state)) - nfs_handle_cb_pathdown(clp); nfs4_clear_recover_bit(clp); nfs_put_client(clp); module_put_and_exit(0); -- cgit v1.2.3 From e005e8041c132af9f70862e1387a222198f95e7f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:48 -0500 Subject: NFSv4: Rename the state reclaimer thread It is really a more general purpose state management thread at this point. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 4 ++-- fs/nfs/nfs4state.c | 49 ++++++++++++++++++++++++++++--------------------- 4 files changed, 32 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ff2c1585d23..4692fdc8abf 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -360,7 +360,7 @@ static int nfs_do_expire_all_delegations(void *ptr) allow_signal(SIGKILL); - if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0) + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) goto out; if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) goto out; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 84a4f5223f6..b7a12e7c660 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -38,7 +38,7 @@ struct idmap; ((err) != NFSERR_NOFILEHANDLE)) enum nfs4_client_state { - NFS4CLNT_STATE_RECOVER = 0, + NFS4CLNT_MANAGER_RUNNING = 0, NFS4CLNT_CHECK_LEASE, NFS4CLNT_LEASE_EXPIRED, NFS4CLNT_RECLAIM_REBOOT, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8a2dee9caa6..2a347d47e38 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -207,7 +207,7 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); - res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER, + res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs4_wait_bit_killable, TASK_KILLABLE); return res; } @@ -2861,7 +2861,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_EXPIRED: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); nfs4_schedule_state_recovery(clp); - if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); task->tk_status = 0; return -EAGAIN; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2894502b94d..83f3dd39ac5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -767,32 +767,34 @@ unlock: return status; } -static int reclaimer(void *); +static int nfs4_run_state_manager(void *); -static inline void nfs4_clear_recover_bit(struct nfs_client *clp) +static void nfs4_clear_state_manager_bit(struct nfs_client *clp) { smp_mb__before_clear_bit(); - clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); + clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); smp_mb__after_clear_bit(); - wake_up_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER); + wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING); rpc_wake_up(&clp->cl_rpcwaitq); } /* - * State recovery routine + * Schedule the nfs_client asynchronous state management routine */ -static void nfs4_recover_state(struct nfs_client *clp) +static void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); - task = kthread_run(reclaimer, clp, "%s-reclaim", + task = kthread_run(nfs4_run_state_manager, clp, "%s-manager", rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); if (!IS_ERR(task)) return; - nfs4_clear_recover_bit(clp); + nfs4_clear_state_manager_bit(clp); nfs_put_client(clp); module_put(THIS_MODULE); } @@ -806,8 +808,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp) return; if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); - if (test_and_set_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) - nfs4_recover_state(clp); + nfs4_schedule_state_manager(clp); } static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) @@ -1106,13 +1107,10 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) return status; } -static int reclaimer(void *ptr) +static void nfs4_state_manager(struct nfs_client *clp) { - struct nfs_client *clp = ptr; int status = 0; - allow_signal(SIGKILL); - /* Ensure exclusive access to NFSv4 state */ while (!list_empty(&clp->cl_superblocks)) { if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { @@ -1161,19 +1159,28 @@ static int reclaimer(void *ptr) nfs_client_return_marked_delegations(clp); continue; } + + nfs4_clear_state_manager_bit(clp); break; } -out: - nfs4_clear_recover_bit(clp); - nfs_put_client(clp); - module_put_and_exit(0); - return 0; + return; out_error: - printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %s" + printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" " with error %d\n", clp->cl_hostname, -status); if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) nfs4_state_end_reclaim_reboot(clp); - goto out; + nfs4_clear_state_manager_bit(clp); +} + +static int nfs4_run_state_manager(void *ptr) +{ + struct nfs_client *clp = ptr; + + allow_signal(SIGKILL); + nfs4_state_manager(clp); + nfs_put_client(clp); + module_put_and_exit(0); + return 0; } /* -- cgit v1.2.3 From f3c76491e7ecacbb7942633f3b2a3514b7476ef9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:48 -0500 Subject: NFSv4: Don't exit the state management if there are still tasks to do Fix up a potential race... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 83f3dd39ac5..9e76712dcae 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1112,7 +1112,7 @@ static void nfs4_state_manager(struct nfs_client *clp) int status = 0; /* Ensure exclusive access to NFSv4 state */ - while (!list_empty(&clp->cl_superblocks)) { + for(;;) { if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); @@ -1161,7 +1161,11 @@ static void nfs4_state_manager(struct nfs_client *clp) } nfs4_clear_state_manager_bit(clp); - break; + /* Did we race with an attempt to give us more work? */ + if (clp->cl_state == 0) + break; + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + break; } return; out_error: -- cgit v1.2.3 From 0d62f85a81216f30a0ba1479b93e84103a5d535b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:49 -0500 Subject: NFSv4: Fix a BAD_SEQUENCEID condition. We really shouldn't be resetting the sequence ids when doing state expiration recovery, since we don't know if the server still remembers our previous state owners. There are servers out there that do attempt to preserve client state even if the lease has expired. Such a server would only release that state if a conflicting OPEN request occurs. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9e76712dcae..6dc36a0d9a8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -951,7 +951,6 @@ static void nfs4_clear_open_state(struct nfs4_state *state) clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_bit(NFS_O_RDWR_STATE, &state->flags); list_for_each_entry(lock, &state->lock_states, ls_locks) { - lock->ls_seqid.counter = 0; lock->ls_seqid.flags = 0; lock->ls_flags &= ~NFS_LOCK_INITIALIZED; } @@ -966,7 +965,6 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_re /* Reset all sequence ids to zero */ for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); - sp->so_seqid.counter = 0; sp->so_seqid.flags = 0; spin_lock(&sp->so_lock); list_for_each_entry(state, &sp->so_states, open_states) { -- cgit v1.2.3 From b0d3ded1a21dc3057daff5a488469d9e6aa1b567 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:50 -0500 Subject: NFSv4: Clean up nfs_expire_all_delegations() Let the actual delegreturn stuff be run in the state manager thread rather than allocating a separate kthread. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 31 +++++-------------------------- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4renewd.c | 16 +++++++++------- fs/nfs/nfs4state.c | 2 +- 4 files changed, 16 insertions(+), 34 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 4692fdc8abf..21eda6c083d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -354,37 +354,16 @@ static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) rcu_read_unlock(); } -static int nfs_do_expire_all_delegations(void *ptr) +static void nfs_delegation_run_state_manager(struct nfs_client *clp) { - struct nfs_client *clp = ptr; - - allow_signal(SIGKILL); - - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) - goto out; - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) - goto out; - nfs_client_mark_return_all_delegations(clp); - nfs_client_return_marked_delegations(clp); -out: - nfs_put_client(clp); - module_put_and_exit(0); + if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) + nfs4_schedule_state_manager(clp); } void nfs_expire_all_delegations(struct nfs_client *clp) { - struct task_struct *task; - - __module_get(THIS_MODULE); - atomic_inc(&clp->cl_count); - task = kthread_run(nfs_do_expire_all_delegations, clp, - "%s-delegreturn", - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)); - if (!IS_ERR(task)) - return; - nfs_put_client(clp); - module_put(THIS_MODULE); + nfs_client_mark_return_all_delegations(clp); + nfs_delegation_run_state_manager(clp); } /* diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b7a12e7c660..ee5f51ef696 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -227,6 +227,7 @@ extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); +extern void nfs4_schedule_state_manager(struct nfs_client *); extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6101f955f23..ca557e677d9 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -77,16 +77,18 @@ nfs4_renew_state(struct work_struct *work) /* Are we close to a lease timeout? */ if (time_after(now, last + lease/3)) { cred = nfs4_get_renew_cred_locked(clp); + spin_unlock(&clp->cl_lock); if (cred == NULL) { - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - spin_unlock(&clp->cl_lock); + if (list_empty(&clp->cl_delegations)) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + goto out; + } nfs_expire_all_delegations(clp); - goto out; + } else { + /* Queue an asynchronous RENEW. */ + nfs4_proc_async_renew(clp, cred); + put_rpccred(cred); } - spin_unlock(&clp->cl_lock); - /* Queue an asynchronous RENEW. */ - nfs4_proc_async_renew(clp, cred); - put_rpccred(cred); timeout = (2 * lease) / 3; spin_lock(&clp->cl_lock); } else diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6dc36a0d9a8..cd16b301f13 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -781,7 +781,7 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp) /* * Schedule the nfs_client asynchronous state management routine */ -static void nfs4_schedule_state_manager(struct nfs_client *clp) +void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; -- cgit v1.2.3 From 6411bd4a471893ab2af103d96253ba97c92d4777 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:51 -0500 Subject: NFSv4: Clean up the asynchronous delegation return Reuse the state management thread in order to return delegations when we get a callback. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 73 ++++++++++++++--------------------------------------- 1 file changed, 19 insertions(+), 54 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 21eda6c083d..00c350c031b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -321,6 +321,12 @@ int nfs_inode_return_delegation(struct inode *inode) return err; } +static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); +} + /* * Return all delegations associated to a super block */ @@ -376,66 +382,25 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp) nfs_client_mark_return_all_delegations(clp); } -struct recall_threadargs { - struct inode *inode; - struct nfs_client *clp; - const nfs4_stateid *stateid; - - struct completion started; - int result; -}; - -static int recall_thread(void *data) -{ - struct recall_threadargs *args = (struct recall_threadargs *)data; - struct inode *inode = igrab(args->inode); - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_delegation *delegation; - - daemonize("nfsv4-delegreturn"); - - nfs_msync_inode(inode); - down_write(&nfsi->rwsem); - spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(nfsi, args->stateid); - if (delegation != NULL) - args->result = 0; - else - args->result = -ENOENT; - spin_unlock(&clp->cl_lock); - complete(&args->started); - nfs_delegation_claim_opens(inode, args->stateid); - up_write(&nfsi->rwsem); - nfs_msync_inode(inode); - - if (delegation != NULL) - nfs_do_return_delegation(inode, delegation, 1); - iput(inode); - module_put_and_exit(0); -} - /* * Asynchronous delegation recall! */ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) { - struct recall_threadargs data = { - .inode = inode, - .stateid = stateid, - }; - int status; + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_delegation *delegation; - init_completion(&data.started); - __module_get(THIS_MODULE); - status = kernel_thread(recall_thread, &data, CLONE_KERNEL); - if (status < 0) - goto out_module_put; - wait_for_completion(&data.started); - return data.result; -out_module_put: - module_put(THIS_MODULE); - return status; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) { + rcu_read_unlock(); + return -ENOENT; + } + nfs_mark_return_delegation(clp, delegation); + rcu_read_unlock(); + nfs_delegation_run_state_manager(clp); + return 0; } /* -- cgit v1.2.3 From b7391f44f26b17ad25c7183a3d6ad50f0a9305ff Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:52 -0500 Subject: NFSv4: Return unreferenced delegations more promptly If the client is not using a delegation, the right thing to do is to return it as soon as possible. This helps reduce the amount of state the server has to track, as well as reducing the potential for conflicts with other clients. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/nfs/delegation.h | 17 ++++------------- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4renewd.c | 1 + 4 files changed, 49 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 00c350c031b..e75f2f8c524 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -43,6 +43,27 @@ static void nfs_free_delegation(struct nfs_delegation *delegation) put_rpccred(cred); } +void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) +{ + set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); +} + +int nfs_have_delegation(struct inode *inode, int flags) +{ + struct nfs_delegation *delegation; + int ret = 0; + + flags &= FMODE_READ|FMODE_WRITE; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL && (delegation->type & flags) == flags) { + nfs_mark_delegation_referenced(delegation); + ret = 1; + } + rcu_read_unlock(); + return ret; +} + static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) { struct inode *inode = state->inode; @@ -188,6 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation->change_attr = nfsi->change_attr; delegation->cred = get_rpccred(cred); delegation->inode = inode; + delegation->flags = 1<lock); spin_lock(&clp->cl_lock); @@ -382,6 +404,26 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp) nfs_client_mark_return_all_delegations(clp); } +static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp) +{ + struct nfs_delegation *delegation; + + rcu_read_lock(); + list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { + if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) + continue; + set_bit(NFS_DELEGATION_RETURN, &delegation->flags); + set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + } + rcu_read_unlock(); +} + +void nfs_expire_unreferenced_delegations(struct nfs_client *clp) +{ + nfs_client_mark_return_unreferenced_delegations(clp); + nfs_delegation_run_state_manager(clp); +} + /* * Asynchronous delegation recall! */ diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 56f3eb558ef..2a74882e5d5 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -28,6 +28,7 @@ struct nfs_delegation { enum { NFS_DELEGATION_NEED_RECLAIM = 0, NFS_DELEGATION_RETURN, + NFS_DELEGATION_REFERENCED, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); @@ -39,6 +40,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_super_return_all_delegations(struct super_block *sb); void nfs_expire_all_delegations(struct nfs_client *clp); +void nfs_expire_unreferenced_delegations(struct nfs_client *clp); void nfs_handle_cb_pathdown(struct nfs_client *clp); void nfs_client_return_marked_delegations(struct nfs_client *clp); @@ -51,19 +53,8 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); -static inline int nfs_have_delegation(struct inode *inode, int flags) -{ - struct nfs_delegation *delegation; - int ret = 0; - - flags &= FMODE_READ|FMODE_WRITE; - rcu_read_lock(); - delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation != NULL && (delegation->type & flags) == flags) - ret = 1; - rcu_read_unlock(); - return ret; -} +void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); +int nfs_have_delegation(struct inode *inode, int flags); #else static inline int nfs_have_delegation(struct inode *inode, int flags) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2a347d47e38..fc0c9d255cf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -421,6 +421,7 @@ static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_fla return 0; if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) return 0; + nfs_mark_delegation_referenced(delegation); return 1; } @@ -505,6 +506,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) goto no_delegation_unlock; + nfs_mark_delegation_referenced(deleg_cur); __update_open_stateid(state, open_stateid, &deleg_cur->stateid, open_flags); ret = 1; no_delegation_unlock: diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index ca557e677d9..f524e932ff7 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -101,6 +101,7 @@ nfs4_renew_state(struct work_struct *work) cancel_delayed_work(&clp->cl_renewd); schedule_delayed_work(&clp->cl_renewd, timeout); spin_unlock(&clp->cl_lock); + nfs_expire_unreferenced_delegations(clp); out: dprintk("%s: done\n", __func__); } -- cgit v1.2.3 From 9082a5cc1e33d081f091f54e6ed69a0628a4bdcc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:53 -0500 Subject: NFSv4: Fix up delegation callbacks Currently, the callback server is listening on IPv6 if it is enabled. This means that IPv4 addresses will always be mapped. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 57 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b643f0ec5c2..9b728f3565a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -223,31 +223,54 @@ void nfs_put_client(struct nfs_client *clp) } } -static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1, - const struct sockaddr_in *sa2) +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped) { - return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr; + switch (sa->sa_family) { + default: + return NULL; + case AF_INET6: + return &((const struct sockaddr_in6 *)sa)->sin6_addr; + break; + case AF_INET: + ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr, + addr_mapped); + return addr_mapped; + } } -static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1, - const struct sockaddr_in6 *sa2) +static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + const struct in6_addr *addr1; + const struct in6_addr *addr2; + struct in6_addr addr1_mapped; + struct in6_addr addr2_mapped; + + addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped); + if (likely(addr1 != NULL)) { + addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped); + if (likely(addr2 != NULL)) + return ipv6_addr_equal(addr1, addr2); + } + return 0; +} +#else +static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1, + const struct sockaddr_in *sa2) { - return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr); + return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr; } static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, const struct sockaddr *sa2) { - switch (sa1->sa_family) { - case AF_INET: - return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1, - (const struct sockaddr_in *)sa2); - case AF_INET6: - return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1, - (const struct sockaddr_in6 *)sa2); - } - BUG(); + if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET)) + return 0; + return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1, + (const struct sockaddr_in *)sa2); } +#endif /* * Find a client by IP address and protocol version @@ -269,8 +292,6 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) if (clp->rpc_ops->version != nfsversion) continue; - if (addr->sa_family != clap->sa_family) - continue; /* Match only the IP address, not the port number */ if (!nfs_sockaddr_match_ipaddr(addr, clap)) continue; @@ -304,8 +325,6 @@ struct nfs_client *nfs_find_client_next(struct nfs_client *clp) if (clp->rpc_ops->version != nfsvers) continue; - if (sap->sa_family != clap->sa_family) - continue; /* Match only the IP address, not the port number */ if (!nfs_sockaddr_match_ipaddr(sap, clap)) continue; -- cgit v1.2.3 From bd7bf9d540c001055fba796ebf146d90e4dd2eb2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:53 -0500 Subject: NFSv4: Convert delegation->type field to fmode_t Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 6 +++--- fs/nfs/nfs4xdr.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index e75f2f8c524..968225a8801 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -48,7 +48,7 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); } -int nfs_have_delegation(struct inode *inode, int flags) +int nfs_have_delegation(struct inode *inode, fmode_t flags) { struct nfs_delegation *delegation; int ret = 0; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 2a74882e5d5..09f38379517 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -17,7 +17,7 @@ struct nfs_delegation { struct rpc_cred *cred; struct inode *inode; nfs4_stateid stateid; - int type; + fmode_t type; loff_t maxsize; __u64 change_attr; unsigned long flags; @@ -54,10 +54,10 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); -int nfs_have_delegation(struct inode *inode, int flags); +int nfs_have_delegation(struct inode *inode, fmode_t flags); #else -static inline int nfs_have_delegation(struct inode *inode, int flags) +static inline int nfs_have_delegation(struct inode *inode, fmode_t flags) { return 0; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b916297d233..879911c9903 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1024,7 +1024,7 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a } } -static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type) +static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type) { __be32 *p; @@ -1053,7 +1053,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr * encode_string(xdr, name->len, name->name); } -static inline void encode_claim_previous(struct xdr_stream *xdr, int type) +static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type) { __be32 *p; -- cgit v1.2.3 From 5584c30630f8a4aac557093b1603e166fe7385be Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:54 -0500 Subject: NFSv4: Clean up is_atomic_open() Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3e64b98f3a9..121b533f1f2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -973,7 +973,7 @@ struct dentry_operations nfs4_dentry_operations = { * Use intent information to determine whether we need to substitute * the NFSv4-style stateful OPEN for the LOOKUP call */ -static int is_atomic_open(struct inode *dir, struct nameidata *nd) +static int is_atomic_open(struct nameidata *nd) { if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0) return 0; @@ -996,7 +996,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); /* Check that we are indeed trying to open this file */ - if (!is_atomic_open(dir, nd)) + if (!is_atomic_open(nd)) goto no_open; if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { @@ -1047,10 +1047,10 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *dir; int openflags, ret = 0; + if (!is_atomic_open(nd)) + goto no_open; parent = dget_parent(dentry); dir = parent->d_inode; - if (!is_atomic_open(dir, nd)) - goto no_open; /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ @@ -1062,11 +1062,11 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) - goto no_open; + goto no_open_dput; openflags = nd->intent.open.flags; /* We cannot do exclusive creation on a positive dentry */ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) - goto no_open; + goto no_open_dput; /* We can't create new files, or truncate existing ones here */ openflags &= ~(O_CREAT|O_TRUNC); @@ -1081,8 +1081,9 @@ out: if (!ret) d_drop(dentry); return ret; -no_open: +no_open_dput: dput(parent); +no_open: if (inode != NULL && nfs_have_delegation(inode, FMODE_READ)) return 1; return nfs_lookup_revalidate(dentry, nd); -- cgit v1.2.3 From 15860ab1d7700249ebe3b0b8ca86ce43dfd0d66f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:54 -0500 Subject: NFSv4: Ensure that we set the verifier when revalidating delegated dentries This ensures that we don't have to look up the dentry again after we return the delegation if we know that the directory didn't change. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 121b533f1f2..ff167aa6243 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -799,6 +799,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) goto out_bad; } + if (nfs_have_delegation(inode, FMODE_READ)) + goto out_set_verifier; + /* Force a full look up iff the parent directory has changed */ if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, nd)) @@ -817,6 +820,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; +out_set_verifier: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: dput(parent); @@ -1084,8 +1088,6 @@ out: no_open_dput: dput(parent); no_open: - if (inode != NULL && nfs_have_delegation(inode, FMODE_READ)) - return 1; return nfs_lookup_revalidate(dentry, nd); } #endif /* CONFIG_NFSV4 */ -- cgit v1.2.3 From 7a50c60e461f6ff97428da9448c3dad5b7bef491 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:55 -0500 Subject: NFS: Use delegations to optimise ACCESS calls Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ff167aa6243..ed7024c3488 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1797,7 +1797,8 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str cache = nfs_access_search_rbtree(inode, cred); if (cache == NULL) goto out; - if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) + if (!nfs_have_delegation(inode, FMODE_READ) && + !time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) goto out_stale; res->jiffies = cache->jiffies; res->cred = cache->cred; -- cgit v1.2.3 From dc0b027dfadfcb8a5504f7d8052754bf8d501ab9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 15:21:56 -0500 Subject: NFSv4: Convert the open and close ops to use fmode Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- fs/nfs/nfs4_fs.h | 8 ++-- fs/nfs/nfs4proc.c | 126 +++++++++++++++++++++++++++++------------------------ fs/nfs/nfs4state.c | 24 +++++----- fs/nfs/nfs4xdr.c | 10 ++--- 5 files changed, 90 insertions(+), 80 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d22eb383e1c..1cf39202c3e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -592,7 +592,7 @@ static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context /* * Given an inode, search for an open context with the desired characteristics */ -struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode) +struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *pos, *ctx = NULL; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ee5f51ef696..d5f5efaf2b2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -161,7 +161,7 @@ struct nfs4_state { unsigned int n_rdonly; /* Number of read-only references */ unsigned int n_wronly; /* Number of write-only references */ unsigned int n_rdwr; /* Number of read/write references */ - int state; /* State on the server (R,W, or RW) */ + fmode_t state; /* State on the server (R,W, or RW) */ atomic_t count; }; @@ -223,9 +223,9 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); -extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t); -extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); +extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); +extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); +extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fc0c9d255cf..e8df52dc2bc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -323,7 +323,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) } static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, - struct nfs4_state_owner *sp, int flags, + struct nfs4_state_owner *sp, fmode_t fmode, int flags, const struct iattr *attrs) { struct dentry *parent = dget_parent(path->dentry); @@ -343,7 +343,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->owner = sp; atomic_inc(&sp->so_count); p->o_arg.fh = NFS_FH(dir); - p->o_arg.open_flags = flags, + p->o_arg.open_flags = flags; + p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id = sp->so_owner_id.id; p->o_arg.name = &p->path.dentry->d_name; @@ -399,10 +400,13 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) return ret; } -static int can_open_cached(struct nfs4_state *state, int mode) +static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) { int ret = 0; - switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) { + + if (open_mode & O_EXCL) + goto out; + switch (mode & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; break; @@ -412,12 +416,13 @@ static int can_open_cached(struct nfs4_state *state, int mode) case FMODE_READ|FMODE_WRITE: ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; } +out: return ret; } -static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags) +static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) { - if ((delegation->type & open_flags) != open_flags) + if ((delegation->type & fmode) != fmode) return 0; if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) return 0; @@ -425,9 +430,9 @@ static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_fla return 1; } -static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) +static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) { - switch (open_flags) { + switch (fmode) { case FMODE_WRITE: state->n_wronly++; break; @@ -437,15 +442,15 @@ static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) case FMODE_READ|FMODE_WRITE: state->n_rdwr++; } - nfs4_state_set_mode_locked(state, state->state | open_flags); + nfs4_state_set_mode_locked(state, state->state | fmode); } -static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) { if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); - switch (open_flags) { + switch (fmode) { case FMODE_READ: set_bit(NFS_O_RDONLY_STATE, &state->flags); break; @@ -457,14 +462,14 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * } } -static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) { write_seqlock(&state->seqlock); - nfs_set_open_stateid_locked(state, stateid, open_flags); + nfs_set_open_stateid_locked(state, stateid, fmode); write_sequnlock(&state->seqlock); } -static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, int open_flags) +static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) { /* * Protect the call to nfs4_state_set_mode_locked and @@ -476,20 +481,20 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s set_bit(NFS_DELEGATED_STATE, &state->flags); } if (open_stateid != NULL) - nfs_set_open_stateid_locked(state, open_stateid, open_flags); + nfs_set_open_stateid_locked(state, open_stateid, fmode); write_sequnlock(&state->seqlock); spin_lock(&state->owner->so_lock); - update_open_stateflags(state, open_flags); + update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); } -static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, int open_flags) +static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode) { struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *deleg_cur; int ret = 0; - open_flags &= (FMODE_READ|FMODE_WRITE); + fmode &= (FMODE_READ|FMODE_WRITE); rcu_read_lock(); deleg_cur = rcu_dereference(nfsi->delegation); @@ -498,7 +503,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat spin_lock(&deleg_cur->lock); if (nfsi->delegation != deleg_cur || - (deleg_cur->type & open_flags) != open_flags) + (deleg_cur->type & fmode) != fmode) goto no_delegation_unlock; if (delegation == NULL) @@ -507,7 +512,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); - __update_open_stateid(state, open_stateid, &deleg_cur->stateid, open_flags); + __update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode); ret = 1; no_delegation_unlock: spin_unlock(&deleg_cur->lock); @@ -515,7 +520,7 @@ no_delegation: rcu_read_unlock(); if (!ret && open_stateid != NULL) { - __update_open_stateid(state, open_stateid, NULL, open_flags); + __update_open_stateid(state, open_stateid, NULL, fmode); ret = 1; } @@ -523,13 +528,13 @@ no_delegation: } -static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags) +static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode) { struct nfs_delegation *delegation; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation == NULL || (delegation->type & open_flags) == open_flags) { + if (delegation == NULL || (delegation->type & fmode) == fmode) { rcu_read_unlock(); return; } @@ -542,15 +547,16 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) struct nfs4_state *state = opendata->state; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; - int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL); + int open_mode = opendata->o_arg.open_flags & O_EXCL; + fmode_t fmode = opendata->o_arg.fmode; nfs4_stateid stateid; int ret = -EAGAIN; for (;;) { - if (can_open_cached(state, open_mode)) { + if (can_open_cached(state, fmode, open_mode)) { spin_lock(&state->owner->so_lock); - if (can_open_cached(state, open_mode)) { - update_open_stateflags(state, open_mode); + if (can_open_cached(state, fmode, open_mode)) { + update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); goto out_return_state; } @@ -559,7 +565,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); if (delegation == NULL || - !can_open_delegated(delegation, open_mode)) { + !can_open_delegated(delegation, fmode)) { rcu_read_unlock(); break; } @@ -572,7 +578,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) ret = -EAGAIN; /* Try to update the stateid using the delegation */ - if (update_open_stateid(state, NULL, &stateid, open_mode)) + if (update_open_stateid(state, NULL, &stateid, fmode)) goto out_return_state; } out: @@ -624,7 +630,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data } update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.open_flags); + data->o_arg.fmode); iput(inode); out: return state; @@ -655,7 +661,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context { struct nfs4_opendata *opendata; - opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -663,12 +669,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context return opendata; } -static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res) +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res) { struct nfs4_state *newstate; int ret; - opendata->o_arg.open_flags = openflags; + opendata->o_arg.open_flags = 0; + opendata->o_arg.fmode = fmode; memset(&opendata->o_res, 0, sizeof(opendata->o_res)); memset(&opendata->c_res, 0, sizeof(opendata->c_res)); nfs4_init_opendata_res(opendata); @@ -678,7 +685,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openf newstate = nfs4_opendata_to_nfs4_state(opendata); if (IS_ERR(newstate)) return PTR_ERR(newstate); - nfs4_close_state(&opendata->path, newstate, openflags); + nfs4_close_state(&opendata->path, newstate, fmode); *res = newstate; return 0; } @@ -734,7 +741,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state { struct nfs_delegation *delegation; struct nfs4_opendata *opendata; - int delegation_type = 0; + fmode_t delegation_type = 0; int status; opendata = nfs4_open_recoverdata_alloc(ctx, state); @@ -847,7 +854,7 @@ static void nfs4_open_confirm_release(void *calldata) goto out_free; state = nfs4_opendata_to_nfs4_state(data); if (!IS_ERR(state)) - nfs4_close_state(&data->path, state, data->o_arg.open_flags); + nfs4_close_state(&data->path, state, data->o_arg.fmode); out_free: nfs4_opendata_put(data); } @@ -911,7 +918,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) if (data->state != NULL) { struct nfs_delegation *delegation; - if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL))) + if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags)) goto out_no_action; rcu_read_lock(); delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); @@ -980,7 +987,7 @@ static void nfs4_open_release(void *calldata) goto out_free; state = nfs4_opendata_to_nfs4_state(data); if (!IS_ERR(state)) - nfs4_close_state(&data->path, state, data->o_arg.open_flags); + nfs4_close_state(&data->path, state, data->o_arg.fmode); out_free: nfs4_opendata_put(data); } @@ -1135,7 +1142,7 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct /* * Returns a referenced nfs4_state */ -static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; @@ -1153,9 +1160,9 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct if (status != 0) goto err_put_state_owner; if (path->dentry->d_inode != NULL) - nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE)); + nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); status = -ENOMEM; - opendata = nfs4_opendata_alloc(path, sp, flags, sattr); + opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr); if (opendata == NULL) goto err_put_state_owner; @@ -1187,14 +1194,14 @@ out_err: } -static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) { struct nfs4_exception exception = { }; struct nfs4_state *res; int status; do { - status = _nfs4_do_open(dir, path, flags, sattr, cred, &res); + status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -1332,7 +1339,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_EXPIRED: - if (calldata->arg.open_flags == 0) + if (calldata->arg.fmode == 0) break; default: if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { @@ -1374,10 +1381,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) nfs_fattr_init(calldata->res.fattr); if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.open_flags = FMODE_READ; + calldata->arg.fmode = FMODE_READ; } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.open_flags = FMODE_WRITE; + calldata->arg.fmode = FMODE_WRITE; } calldata->timestamp = jiffies; rpc_call_start(task); @@ -1430,7 +1437,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); if (calldata->arg.seqid == NULL) goto out_free_calldata; - calldata->arg.open_flags = 0; + calldata->arg.fmode = 0; calldata->arg.bitmask = server->attr_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; @@ -1457,13 +1464,13 @@ out: return status; } -static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) +static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) { struct file *filp; int ret; /* If the open_intent is for execute, we have an extra check to make */ - if (nd->intent.open.flags & FMODE_EXEC) { + if (fmode & FMODE_EXEC) { ret = nfs_may_open(state->inode, state->owner->so_cred, nd->intent.open.flags); @@ -1479,7 +1486,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct } ret = PTR_ERR(filp); out_close: - nfs4_close_sync(path, state, nd->intent.open.flags); + nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE)); return ret; } @@ -1495,6 +1502,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct rpc_cred *cred; struct nfs4_state *state; struct dentry *res; + fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -1512,7 +1520,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) { @@ -1527,7 +1535,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) path.dentry = res; nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); nfs_unblock_sillyrename(parent); - nfs4_intent_set_file(nd, &path, state); + nfs4_intent_set_file(nd, &path, state, fmode); return res; } @@ -1540,11 +1548,12 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st }; struct rpc_cred *cred; struct nfs4_state *state; + fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE); cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); - state = nfs4_do_open(dir, &path, openflags, NULL, cred); + state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred); put_rpccred(cred); if (IS_ERR(state)) { switch (PTR_ERR(state)) { @@ -1561,10 +1570,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st } if (state->inode == dentry->d_inode) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - nfs4_intent_set_file(nd, &path, state); + nfs4_intent_set_file(nd, &path, state, fmode); return 1; } - nfs4_close_sync(&path, state, openflags); + nfs4_close_sync(&path, state, fmode); out_drop: d_drop(dentry); return 0; @@ -1990,6 +1999,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, }; struct nfs4_state *state; struct rpc_cred *cred; + fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); int status = 0; cred = rpc_lookup_cred(); @@ -1997,7 +2007,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, status = PTR_ERR(cred); goto out; } - state = nfs4_do_open(dir, &path, flags, sattr, cred); + state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -2013,9 +2023,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, nfs_post_op_update_inode(state->inode, &fattr); } if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) - status = nfs4_intent_set_file(nd, &path, state); + status = nfs4_intent_set_file(nd, &path, state, fmode); else - nfs4_close_sync(&path, state, flags); + nfs4_close_sync(&path, state, fmode); out_putcred: put_rpccred(cred); out: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cd16b301f13..2022fe47966 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -363,18 +363,18 @@ nfs4_alloc_open_state(void) } void -nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode) +nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode) { - if (state->state == mode) + if (state->state == fmode) return; /* NB! List reordering - see the reclaim code for why. */ - if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) { - if (mode & FMODE_WRITE) + if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) { + if (fmode & FMODE_WRITE) list_move(&state->open_states, &state->owner->so_states); else list_move_tail(&state->open_states, &state->owner->so_states); } - state->state = mode; + state->state = fmode; } static struct nfs4_state * @@ -454,16 +454,16 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Close the current file. */ -static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mode, int wait) +static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait) { struct nfs4_state_owner *owner = state->owner; int call_close = 0; - int newstate; + fmode_t newstate; atomic_inc(&owner->so_count); /* Protect against nfs4_find_state() */ spin_lock(&owner->so_lock); - switch (mode & (FMODE_READ | FMODE_WRITE)) { + switch (fmode & (FMODE_READ | FMODE_WRITE)) { case FMODE_READ: state->n_rdonly--; break; @@ -498,14 +498,14 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mod nfs4_do_close(path, state, wait); } -void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) +void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) { - __nfs4_close(path, state, mode, 0); + __nfs4_close(path, state, fmode, 0); } -void nfs4_close_sync(struct path *path, struct nfs4_state *state, mode_t mode) +void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) { - __nfs4_close(path, state, mode, 1); + __nfs4_close(path, state, fmode, 1); } /* diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 879911c9903..d8ddfc5467d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -953,12 +953,12 @@ static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name) return 0; } -static void encode_share_access(struct xdr_stream *xdr, int open_flags) +static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) { __be32 *p; RESERVE_SPACE(8); - switch (open_flags & (FMODE_READ|FMODE_WRITE)) { + switch (fmode & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: WRITE32(NFS4_SHARE_ACCESS_READ); break; @@ -969,7 +969,7 @@ static void encode_share_access(struct xdr_stream *xdr, int open_flags) WRITE32(NFS4_SHARE_ACCESS_BOTH); break; default: - BUG(); + WRITE32(0); } WRITE32(0); /* for linux, share_deny = 0 always */ } @@ -984,7 +984,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena RESERVE_SPACE(8); WRITE32(OP_OPEN); WRITE32(arg->seqid->sequence->counter); - encode_share_access(xdr, arg->open_flags); + encode_share_access(xdr, arg->fmode); RESERVE_SPACE(28); WRITE64(arg->clientid); WRITE32(16); @@ -1112,7 +1112,7 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea WRITE32(OP_OPEN_DOWNGRADE); WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); - encode_share_access(xdr, arg->open_flags); + encode_share_access(xdr, arg->fmode); return 0; } -- cgit v1.2.3 From 64672d55d93c26fb4035fd1a84a803cbc09cb058 Mon Sep 17 00:00:00 2001 From: Peter Staubach Date: Tue, 23 Dec 2008 15:21:56 -0500 Subject: optimize attribute timeouts for "noac" and "actimeo=0" Hi. I've been looking at a bugzilla which describes a problem where a customer was advised to use either the "noac" or "actimeo=0" mount options to solve a consistency problem that they were seeing in the file attributes. It turned out that this solution did not work reliably for them because sometimes, the local attribute cache was believed to be valid and not timed out. (With an attribute cache timeout of 0, the cache should always appear to be timed out.) In looking at this situation, it appears to me that the problem is that the attribute cache timeout code has an off-by-one error in it. It is assuming that the cache is valid in the region, [read_cache_jiffies, read_cache_jiffies + attrtimeo]. The cache should be considered valid only in the region, [read_cache_jiffies, read_cache_jiffies + attrtimeo). With this change, the options, "noac" and "actimeo=0", work as originally expected. This problem was previously addressed by special casing the attrtimeo == 0 case. However, since the problem is only an off- by-one error, the cleaner solution is address the off-by-one error and thus, not require the special case. Thanx... ps Signed-off-by: Peter Staubach Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ed7024c3488..e35c8199f82 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1798,7 +1798,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str if (cache == NULL) goto out; if (!nfs_have_delegation(inode, FMODE_READ) && - !time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) + !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) goto out_stale; res->jiffies = cache->jiffies; res->cred = cache->cred; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1cf39202c3e..0c381686171 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -712,14 +712,7 @@ int nfs_attribute_timeout(struct inode *inode) if (nfs_have_delegation(inode, FMODE_READ)) return 0; - /* - * Special case: if the attribute timeout is set to 0, then always - * treat the cache as having expired (unless holding - * a delegation). - */ - if (nfsi->attrtimeo == 0) - return 1; - return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); + return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } /** @@ -1182,7 +1175,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attrtimeo_timestamp = now; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { - if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { + if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; -- cgit v1.2.3 From 027b6ca02192f381a5a91237ba8a8cf625dc6f6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Dec 2008 16:04:13 -0500 Subject: NFSv4: Fix an infinite loop in the NFS state recovery code Marten Gajda states: I tracked the problem down to the function nfs4_do_open_expired. Within this function _nfs4_open_expired is called and may return -NFS4ERR_DELAY. When a further call to _nfs4_open_expired is executed and does not return -NFS4ERR_DELAY the "exception.retry" variable is not reset to 0, causing the loop to iterate again (and as long as err != -NFS4ERR_DELAY, probably forever) Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e8df52dc2bc..49eebe25bba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1104,8 +1104,9 @@ static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4 do { err = _nfs4_open_expired(ctx, state); - if (err == -NFS4ERR_DELAY) - nfs4_handle_exception(server, err, &exception); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } -- cgit v1.2.3 From aadf61521199e5c0b2976002213819cafa41b897 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 23 Dec 2008 16:06:13 -0500 Subject: nfs: return compound hdr.status when there are no op replies When there are no op replies encoded in the compound reply hdr.status still contains the overall status of the compound rpc. This can happen, e.g., when the server returns a NFS4ERR_MINOR_VERS_MISMATCH error. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d8ddfc5467d..3f18a266a49 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2222,6 +2222,8 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) hdr->tag = (char *)p; p += XDR_QUADLEN(hdr->taglen); READ32(hdr->nops); + if (unlikely(hdr->nops < 1)) + return nfs4_stat_to_errno(hdr->status); return 0; } -- cgit v1.2.3 From 374130770efc80418b155b2966ff958495e03948 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 23 Dec 2008 16:06:14 -0500 Subject: nfs: remove incorrect usage of nfs4 compound response hdr.status 3 call sites look at hdr.status before returning success. hdr.status must be zero in this case so there's no point in this. Currently, hdr.status is correctly processed at decode_op_hdr time if the op status cannot be decoded. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3f18a266a49..7eeed0ed589 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4423,8 +4423,6 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf status = decode_putfh(&xdr); if (!status) status = decode_fsinfo(&xdr, fsinfo); - if (!status) - status = nfs4_stat_to_errno(hdr.status); return status; } @@ -4513,8 +4511,6 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, status = decode_compound_hdr(&xdr, &hdr); if (!status) status = decode_setclientid(&xdr, clp); - if (!status) - status = nfs4_stat_to_errno(hdr.status); return status; } @@ -4535,8 +4531,6 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str status = decode_putrootfh(&xdr); if (!status) status = decode_fsinfo(&xdr, fsinfo); - if (!status) - status = nfs4_stat_to_errno(hdr.status); return status; } -- cgit v1.2.3 From 6c0195a4681c08335a33a483be801aaf0ed7f192 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 23 Dec 2008 16:06:15 -0500 Subject: NFS: remove white space from nfs4xdr.c Clean-up Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 62 ++++++++++++++++++++++++++------------------------------ 1 file changed, 29 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7eeed0ed589..7dde309ce1a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -8,7 +8,7 @@ * * Kendrick Smith * Andy Adamson - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -67,7 +67,7 @@ static int nfs4_stat_to_errno(int); #define NFS4_MAXTAGLEN 0 #endif -/* lock,open owner id: +/* lock,open owner id: * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ #define open_owner_id_maxsz (1 + 4) @@ -709,7 +709,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; WRITE32(NFS4_SET_TO_SERVER_TIME); } - + /* * Now we backfill the bitmap and the attribute buffer length. */ @@ -735,7 +735,7 @@ static int encode_access(struct xdr_stream *xdr, u32 access) RESERVE_SPACE(8); WRITE32(OP_ACCESS); WRITE32(access); - + return 0; } @@ -747,14 +747,14 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) WRITE32(OP_CLOSE); WRITE32(arg->seqid->sequence->counter); WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); - + return 0; } static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args) { __be32 *p; - + RESERVE_SPACE(16); WRITE32(OP_COMMIT); WRITE64(args->offset); @@ -766,7 +766,7 @@ static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *arg static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create) { __be32 *p; - + RESERVE_SPACE(8); WRITE32(OP_CREATE); WRITE32(create->ftype); @@ -856,7 +856,7 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name) WRITE32(OP_LINK); WRITE32(name->len); WRITEMEM(name->name, name->len); - + return 0; } @@ -1133,7 +1133,7 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh) static int encode_putrootfh(struct xdr_stream *xdr) { __be32 *p; - + RESERVE_SPACE(4); WRITE32(OP_PUTROOTFH); @@ -1232,7 +1232,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con WRITE32(OP_RENAME); WRITE32(oldname->len); WRITEMEM(oldname->name, oldname->len); - + RESERVE_SPACE(4 + newname->len); WRITE32(newname->len); WRITEMEM(newname->name, newname->len); @@ -1296,7 +1296,7 @@ static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs * { int status; __be32 *p; - + RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_SETATTR); WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); @@ -2217,7 +2217,7 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) READ_BUF(8); READ32(hdr->status); READ32(hdr->taglen); - + READ_BUF(hdr->taglen + 4); hdr->tag = (char *)p; p += XDR_QUADLEN(hdr->taglen); @@ -3049,8 +3049,7 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) { __be32 *savep; - uint32_t attrlen, - bitmap[2] = {0}; + uint32_t attrlen, bitmap[2] = {0}; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -3072,14 +3071,13 @@ xdr_error: dprintk("%s: xdr returned %d!\n", __func__, -status); return status; } - + static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) { __be32 *savep; - uint32_t attrlen, - bitmap[2] = {0}; + uint32_t attrlen, bitmap[2] = {0}; int status; - + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto xdr_error; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) @@ -3109,10 +3107,9 @@ xdr_error: static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) { __be32 *savep; - uint32_t attrlen, - bitmap[2] = {0}; + uint32_t attrlen, bitmap[2] = {0}; int status; - + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto xdr_error; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) @@ -3258,7 +3255,7 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh) static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) { int status; - + status = decode_op_hdr(xdr, OP_LINK); if (status) return status; @@ -3564,7 +3561,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n dprintk("NFS: readdir reply truncated!\n"); entry[1] = 1; } -out: +out: kunmap_atomic(kaddr, KM_USER0); return 0; short_pkt: @@ -3720,7 +3717,6 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) uint32_t bmlen; int status; - status = decode_op_hdr(xdr, OP_SETATTR); if (status) return status; @@ -3740,7 +3736,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) READ32(opnum); if (opnum != OP_SETCLIENTID) { dprintk("nfs: decode_setclientid: Server returned operation" - " %d\n", opnum); + " %d\n", opnum); return -EIO; } READ32(nfserr); @@ -3829,7 +3825,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac struct xdr_stream xdr; struct compound_hdr hdr; int status; - + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; @@ -3852,7 +3848,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo struct xdr_stream xdr; struct compound_hdr hdr; int status; - + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; @@ -3875,7 +3871,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf struct xdr_stream xdr; struct compound_hdr hdr; int status; - + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; @@ -3895,7 +3891,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem struct xdr_stream xdr; struct compound_hdr hdr; int status; - + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; @@ -3916,7 +3912,7 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re struct xdr_stream xdr; struct compound_hdr hdr; int status; - + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; @@ -3946,7 +3942,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link struct xdr_stream xdr; struct compound_hdr hdr; int status; - + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; @@ -3979,7 +3975,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr struct xdr_stream xdr; struct compound_hdr hdr; int status; - + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; @@ -4016,7 +4012,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g struct xdr_stream xdr; struct compound_hdr hdr; int status; - + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); if (status) -- cgit v1.2.3 From 05d564fe00c05bf8ff93948057ca1acb5bc68e10 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 23 Dec 2008 16:06:15 -0500 Subject: NFS: fix tabs in nfs4xdr.c Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 490 +++++++++++++++++++++++++++---------------------------- 1 file changed, 245 insertions(+), 245 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7dde309ce1a..29656c5090c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -755,12 +755,12 @@ static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *arg { __be32 *p; - RESERVE_SPACE(16); - WRITE32(OP_COMMIT); - WRITE64(args->offset); - WRITE32(args->count); + RESERVE_SPACE(16); + WRITE32(OP_COMMIT); + WRITE64(args->offset); + WRITE32(args->count); - return 0; + return 0; } static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create) @@ -797,25 +797,25 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap) { - __be32 *p; + __be32 *p; - RESERVE_SPACE(12); - WRITE32(OP_GETATTR); - WRITE32(1); - WRITE32(bitmap); - return 0; + RESERVE_SPACE(12); + WRITE32(OP_GETATTR); + WRITE32(1); + WRITE32(bitmap); + return 0; } static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1) { - __be32 *p; + __be32 *p; - RESERVE_SPACE(16); - WRITE32(OP_GETATTR); - WRITE32(2); - WRITE32(bm0); - WRITE32(bm1); - return 0; + RESERVE_SPACE(16); + WRITE32(OP_GETATTR); + WRITE32(2); + WRITE32(bm0); + WRITE32(bm1); + return 0; } static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) @@ -959,17 +959,17 @@ static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) RESERVE_SPACE(8); switch (fmode & (FMODE_READ|FMODE_WRITE)) { - case FMODE_READ: - WRITE32(NFS4_SHARE_ACCESS_READ); - break; - case FMODE_WRITE: - WRITE32(NFS4_SHARE_ACCESS_WRITE); - break; - case FMODE_READ|FMODE_WRITE: - WRITE32(NFS4_SHARE_ACCESS_BOTH); - break; - default: - WRITE32(0); + case FMODE_READ: + WRITE32(NFS4_SHARE_ACCESS_READ); + break; + case FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_WRITE); + break; + case FMODE_READ|FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_BOTH); + break; + default: + WRITE32(0); } WRITE32(0); /* for linux, share_deny = 0 always */ } @@ -998,13 +998,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op RESERVE_SPACE(4); switch(arg->open_flags & O_EXCL) { - case 0: - WRITE32(NFS4_CREATE_UNCHECKED); - encode_attrs(xdr, arg->u.attrs, arg->server); - break; - default: - WRITE32(NFS4_CREATE_EXCLUSIVE); - encode_nfs4_verifier(xdr, &arg->u.verifier); + case 0: + WRITE32(NFS4_CREATE_UNCHECKED); + encode_attrs(xdr, arg->u.attrs, arg->server); + break; + default: + WRITE32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); } } @@ -1014,13 +1014,13 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a RESERVE_SPACE(4); switch (arg->open_flags & O_CREAT) { - case 0: - WRITE32(NFS4_OPEN_NOCREATE); - break; - default: - BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); - WRITE32(NFS4_OPEN_CREATE); - encode_createmode(xdr, arg); + case 0: + WRITE32(NFS4_OPEN_NOCREATE); + break; + default: + BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); + WRITE32(NFS4_OPEN_CREATE); + encode_createmode(xdr, arg); } } @@ -1030,17 +1030,17 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delega RESERVE_SPACE(4); switch (delegation_type) { - case 0: - WRITE32(NFS4_OPEN_DELEGATE_NONE); - break; - case FMODE_READ: - WRITE32(NFS4_OPEN_DELEGATE_READ); - break; - case FMODE_WRITE|FMODE_READ: - WRITE32(NFS4_OPEN_DELEGATE_WRITE); - break; - default: - BUG(); + case 0: + WRITE32(NFS4_OPEN_DELEGATE_NONE); + break; + case FMODE_READ: + WRITE32(NFS4_OPEN_DELEGATE_READ); + break; + case FMODE_WRITE|FMODE_READ: + WRITE32(NFS4_OPEN_DELEGATE_WRITE); + break; + default: + BUG(); } } @@ -1077,17 +1077,17 @@ static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) encode_openhdr(xdr, arg); encode_opentype(xdr, arg); switch (arg->claim) { - case NFS4_OPEN_CLAIM_NULL: - encode_claim_null(xdr, arg->name); - break; - case NFS4_OPEN_CLAIM_PREVIOUS: - encode_claim_previous(xdr, arg->u.delegation_type); - break; - case NFS4_OPEN_CLAIM_DELEGATE_CUR: - encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); - break; - default: - BUG(); + case NFS4_OPEN_CLAIM_NULL: + encode_claim_null(xdr, arg->name); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + encode_claim_previous(xdr, arg->u.delegation_type); + break; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); + break; + default: + BUG(); } return 0; } @@ -1132,12 +1132,12 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh) static int encode_putrootfh(struct xdr_stream *xdr) { - __be32 *p; + __be32 *p; - RESERVE_SPACE(4); - WRITE32(OP_PUTROOTFH); + RESERVE_SPACE(4); + WRITE32(OP_PUTROOTFH); - return 0; + return 0; } static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) @@ -1297,14 +1297,14 @@ static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs * int status; __be32 *p; - RESERVE_SPACE(4+NFS4_STATEID_SIZE); - WRITE32(OP_SETATTR); + RESERVE_SPACE(4+NFS4_STATEID_SIZE); + WRITE32(OP_SETATTR); WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); - if ((status = encode_attrs(xdr, arg->iap, server))) + if ((status = encode_attrs(xdr, arg->iap, server))) return status; - return 0; + return 0; } static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid) @@ -1328,14 +1328,14 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state) { - __be32 *p; + __be32 *p; - RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); - WRITE32(OP_SETCLIENTID_CONFIRM); - WRITE64(client_state->cl_clientid); - WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); + RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); + WRITE32(OP_SETCLIENTID_CONFIRM); + WRITE64(client_state->cl_clientid); + WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); - return 0; + return 0; } static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args) @@ -1584,23 +1584,23 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf */ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) { - struct xdr_stream xdr; - struct compound_hdr hdr = { - .nops = 3, - }; - int status; - - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); - if(status) - goto out; - status = encode_close(&xdr, args); + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_close(&xdr, args); if (status != 0) goto out; status = encode_getfattr(&xdr, args->bitmask); out: - return status; + return status; } /* @@ -1875,25 +1875,24 @@ out: * Encode an SETATTR request */ static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args) - { - struct xdr_stream xdr; - struct compound_hdr hdr = { - .nops = 3, - }; - int status; - - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); - if(status) - goto out; - status = encode_setattr(&xdr, args, args->server); - if(status) - goto out; + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_setattr(&xdr, args, args->server); + if(status) + goto out; status = encode_getfattr(&xdr, args->bitmask); out: - return status; + return status; } /* @@ -3343,27 +3342,27 @@ static int decode_lookup(struct xdr_stream *xdr) /* This is too sick! */ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) { - __be32 *p; + __be32 *p; uint32_t limit_type, nblocks, blocksize; READ_BUF(12); READ32(limit_type); switch (limit_type) { - case 1: - READ64(*maxsize); - break; - case 2: - READ32(nblocks); - READ32(blocksize); - *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; + case 1: + READ64(*maxsize); + break; + case 2: + READ32(nblocks); + READ32(blocksize); + *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; } return 0; } static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) { - __be32 *p; - uint32_t delegation_type; + __be32 *p; + uint32_t delegation_type; READ_BUF(4); READ32(delegation_type); @@ -3374,13 +3373,14 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) READ_BUF(NFS4_STATEID_SIZE+4); COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); READ32(res->do_recall); + switch (delegation_type) { - case NFS4_OPEN_DELEGATE_READ: - res->delegation_type = FMODE_READ; - break; - case NFS4_OPEN_DELEGATE_WRITE: - res->delegation_type = FMODE_WRITE|FMODE_READ; - if (decode_space_limit(xdr, &res->maxsize) < 0) + case NFS4_OPEN_DELEGATE_READ: + res->delegation_type = FMODE_READ; + break; + case NFS4_OPEN_DELEGATE_WRITE: + res->delegation_type = FMODE_WRITE|FMODE_READ; + if (decode_space_limit(xdr, &res->maxsize) < 0) return -EIO; } return decode_ace(xdr, NULL, res->server->nfs_client); @@ -3388,27 +3388,27 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) { - __be32 *p; + __be32 *p; uint32_t savewords, bmlen, i; - int status; + int status; - status = decode_op_hdr(xdr, OP_OPEN); + status = decode_op_hdr(xdr, OP_OPEN); if (status != -EIO) nfs_increment_open_seqid(status, res->seqid); - if (status) - return status; - READ_BUF(NFS4_STATEID_SIZE); - COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); + if (status) + return status; + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); - decode_change_info(xdr, &res->cinfo); + decode_change_info(xdr, &res->cinfo); - READ_BUF(8); - READ32(res->rflags); - READ32(bmlen); - if (bmlen > 10) - goto xdr_error; + READ_BUF(8); + READ32(res->rflags); + READ32(bmlen); + if (bmlen > 10) + goto xdr_error; - READ_BUF(bmlen << 2); + READ_BUF(bmlen << 2); savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); for (i = 0; i < savewords; ++i) READ32(res->attrset[i]); @@ -3423,17 +3423,17 @@ xdr_error: static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) { - __be32 *p; + __be32 *p; int status; - status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); + status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); if (status != -EIO) nfs_increment_open_seqid(status, res->seqid); - if (status) - return status; - READ_BUF(NFS4_STATEID_SIZE); - COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); - return 0; + if (status) + return status; + READ_BUF(NFS4_STATEID_SIZE); + COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); + return 0; } static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) @@ -3794,23 +3794,23 @@ static int decode_delegreturn(struct xdr_stream *xdr) */ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) { - struct xdr_stream xdr; - struct compound_hdr hdr; - int status; - - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); - if (status) - goto out; - status = decode_putfh(&xdr); - if (status) - goto out; - status = decode_open_downgrade(&xdr, res); + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_downgrade(&xdr, res); if (status != 0) goto out; decode_getfattr(&xdr, res->fattr, res->server); out: - return status; + return status; } /* @@ -4023,7 +4023,6 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g status = decode_getfattr(&xdr, res->fattr, res->server); out: return status; - } /* @@ -4032,21 +4031,22 @@ out: static int nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args) { - struct xdr_stream xdr; - struct compound_hdr hdr = { - .nops = 2, - }; - int status; - - xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); - if (status) - goto out; - status = encode_setacl(&xdr, args); + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_setacl(&xdr, args); out: - return status; + return status; } + /* * Decode SETACL response */ @@ -4097,18 +4097,18 @@ out: */ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) { - struct xdr_stream xdr; - struct compound_hdr hdr; - int status; - - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); - if (status) - goto out; - status = decode_putfh(&xdr); - if (status) - goto out; - status = decode_close(&xdr, res); + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_close(&xdr, res); if (status != 0) goto out; /* @@ -4119,7 +4119,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos */ decode_getfattr(&xdr, res->fattr, res->server); out: - return status; + return status; } /* @@ -4127,23 +4127,23 @@ out: */ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) { - struct xdr_stream xdr; - struct compound_hdr hdr; - int status; - - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); - if (status) - goto out; - status = decode_putfh(&xdr); - if (status) - goto out; - status = decode_savefh(&xdr); + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_savefh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); if (status) goto out; - status = decode_open(&xdr, res); - if (status) - goto out; if (decode_getfh(&xdr, &res->fh) != 0) goto out; if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) @@ -4152,7 +4152,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr goto out; decode_getfattr(&xdr, res->dir_attr, res->server); out: - return status; + return status; } /* @@ -4160,20 +4160,20 @@ out: */ static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res) { - struct xdr_stream xdr; - struct compound_hdr hdr; - int status; - - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); - if (status) - goto out; - status = decode_putfh(&xdr); - if (status) - goto out; - status = decode_open_confirm(&xdr, res); + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_confirm(&xdr, res); out: - return status; + return status; } /* @@ -4181,23 +4181,23 @@ out: */ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) { - struct xdr_stream xdr; - struct compound_hdr hdr; - int status; - - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); - if (status) - goto out; - status = decode_putfh(&xdr); - if (status) - goto out; - status = decode_open(&xdr, res); - if (status) - goto out; + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); + if (status) + goto out; decode_getfattr(&xdr, res->f_attr, res->server); out: - return status; + return status; } /* @@ -4205,25 +4205,25 @@ out: */ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res) { - struct xdr_stream xdr; - struct compound_hdr hdr; - int status; - - xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - status = decode_compound_hdr(&xdr, &hdr); - if (status) - goto out; - status = decode_putfh(&xdr); - if (status) - goto out; - status = decode_setattr(&xdr, res); - if (status) - goto out; + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); + if (status) + goto out; status = decode_getfattr(&xdr, res->fattr, res->server); if (status == NFS4ERR_DELAY) status = 0; out: - return status; + return status; } /* @@ -4707,7 +4707,7 @@ nfs4_stat_to_errno(int stat) .p_replen = NFS4_##restype##_sz, \ .p_statidx = NFSPROC4_CLNT_##proc, \ .p_name = #proc, \ - } +} struct rpc_procinfo nfs4_procedures[] = { PROC(READ, enc_read, dec_read), -- cgit v1.2.3 From 49c2559e29884fbb13fd273a108b213decd1d8a5 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 23 Dec 2008 16:06:16 -0500 Subject: NFS: fix comment placement in nfs4xdr.c Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 29656c5090c..273d0010060 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3789,6 +3789,10 @@ static int decode_delegreturn(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_DELEGRETURN); } +/* + * END OF "GENERIC" DECODE ROUTINES. + */ + /* * Decode OPEN_DOWNGRADE response */ @@ -3813,10 +3817,6 @@ out: return status; } -/* - * END OF "GENERIC" DECODE ROUTINES. - */ - /* * Decode ACCESS response */ -- cgit v1.2.3 From d017931cff77f2a1603c9c76e96477743abc9f41 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 23 Dec 2008 16:06:17 -0500 Subject: NFS: increment number of operations in each encode routine Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 417 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 247 insertions(+), 170 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 273d0010060..8f6c061bb6c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -541,6 +541,7 @@ static struct { struct compound_hdr { int32_t status; uint32_t nops; + __be32 * nops_p; uint32_t taglen; char * tag; }; @@ -578,7 +579,7 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char * xdr_encode_opaque(p, str, len); } -static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) +static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; @@ -588,8 +589,13 @@ static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) WRITE32(hdr->taglen); WRITEMEM(hdr->tag, hdr->taglen); WRITE32(NFS4_MINOR_VERSION); + hdr->nops_p = p; WRITE32(hdr->nops); - return 0; +} + +static void encode_nops(struct compound_hdr *hdr) +{ + *hdr->nops_p = htonl(hdr->nops); } static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) @@ -728,18 +734,19 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s return status; } -static int encode_access(struct xdr_stream *xdr, u32 access) +static int encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(8); WRITE32(OP_ACCESS); WRITE32(access); + hdr->nops++; return 0; } -static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) +static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { __be32 *p; @@ -747,11 +754,12 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) WRITE32(OP_CLOSE); WRITE32(arg->seqid->sequence->counter); WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; return 0; } -static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args) +static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) { __be32 *p; @@ -759,11 +767,12 @@ static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *arg WRITE32(OP_COMMIT); WRITE64(args->offset); WRITE32(args->count); + hdr->nops++; return 0; } -static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create) +static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) { __be32 *p; @@ -791,11 +800,12 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c RESERVE_SPACE(4 + create->name->len); WRITE32(create->name->len); WRITEMEM(create->name->name, create->name->len); + hdr->nops++; return encode_attrs(xdr, create->attrs, create->server); } -static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap) +static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) { __be32 *p; @@ -803,10 +813,11 @@ static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap) WRITE32(OP_GETATTR); WRITE32(1); WRITE32(bitmap); + hdr->nops++; return 0; } -static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1) +static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) { __be32 *p; @@ -815,40 +826,43 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1 WRITE32(2); WRITE32(bm0); WRITE32(bm1); + hdr->nops++; return 0; } -static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) +static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { return encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], - bitmask[1] & nfs4_fattr_bitmap[1]); + bitmask[1] & nfs4_fattr_bitmap[1], hdr); } -static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) +static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], - bitmask[1] & nfs4_fsinfo_bitmap[1]); + bitmask[1] & nfs4_fsinfo_bitmap[1], hdr); } -static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask) +static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { return encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0], - bitmask[1] & nfs4_fs_locations_bitmap[1]); + bitmask[1] & nfs4_fs_locations_bitmap[1], + hdr); } -static int encode_getfh(struct xdr_stream *xdr) +static int encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(4); WRITE32(OP_GETFH); + hdr->nops++; return 0; } -static int encode_link(struct xdr_stream *xdr, const struct qstr *name) +static int encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { __be32 *p; @@ -856,6 +870,7 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name) WRITE32(OP_LINK); WRITE32(name->len); WRITEMEM(name->name, name->len); + hdr->nops++; return 0; } @@ -878,7 +893,7 @@ static inline uint64_t nfs4_lock_length(struct file_lock *fl) * opcode,type,reclaim,offset,length,new_lock_owner = 32 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 */ -static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) +static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args, struct compound_hdr *hdr) { __be32 *p; @@ -904,11 +919,12 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); WRITE32(args->lock_seqid->sequence->counter); } + hdr->nops++; return 0; } -static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args) +static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) { __be32 *p; @@ -921,11 +937,12 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg WRITE32(16); WRITEMEM("lock id:", 8); WRITE64(args->lock_owner.id); + hdr->nops++; return 0; } -static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args) +static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) { __be32 *p; @@ -936,11 +953,12 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *arg WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); + hdr->nops++; return 0; } -static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name) +static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { int len = name->len; __be32 *p; @@ -949,6 +967,7 @@ static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name) WRITE32(OP_LOOKUP); WRITE32(len); WRITEMEM(name->name, len); + hdr->nops++; return 0; } @@ -1072,7 +1091,7 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc encode_string(xdr, name->len, name->name); } -static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) +static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) { encode_openhdr(xdr, arg); encode_opentype(xdr, arg); @@ -1089,10 +1108,11 @@ static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) default: BUG(); } + hdr->nops++; return 0; } -static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg) +static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) { __be32 *p; @@ -1100,11 +1120,12 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con WRITE32(OP_OPEN_CONFIRM); WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); + hdr->nops++; return 0; } -static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg) +static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { __be32 *p; @@ -1113,11 +1134,12 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->fmode); + hdr->nops++; return 0; } static int -encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh) +encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) { int len = fh->size; __be32 *p; @@ -1126,16 +1148,18 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh) WRITE32(OP_PUTFH); WRITE32(len); WRITEMEM(fh->data, len); + hdr->nops++; return 0; } -static int encode_putrootfh(struct xdr_stream *xdr) +static int encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(4); WRITE32(OP_PUTROOTFH); + hdr->nops++; return 0; } @@ -1153,7 +1177,7 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); } -static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) +static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) { __be32 *p; @@ -1165,11 +1189,12 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) RESERVE_SPACE(12); WRITE64(args->offset); WRITE32(args->count); + hdr->nops++; return 0; } -static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) +static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { uint32_t attrs[2] = { FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, @@ -1191,6 +1216,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; WRITE32(attrs[0] & readdir->bitmask[0]); WRITE32(attrs[1] & readdir->bitmask[1]); + hdr->nops++; dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", __func__, (unsigned long long)readdir->cookie, @@ -1202,17 +1228,18 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg return 0; } -static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) +static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(4); WRITE32(OP_READLINK); + hdr->nops++; return 0; } -static int encode_remove(struct xdr_stream *xdr, const struct qstr *name) +static int encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { __be32 *p; @@ -1220,11 +1247,12 @@ static int encode_remove(struct xdr_stream *xdr, const struct qstr *name) WRITE32(OP_REMOVE); WRITE32(name->len); WRITEMEM(name->name, name->len); + hdr->nops++; return 0; } -static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname) +static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) { __be32 *p; @@ -1236,34 +1264,37 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con RESERVE_SPACE(4 + newname->len); WRITE32(newname->len); WRITEMEM(newname->name, newname->len); + hdr->nops++; return 0; } -static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid) +static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(12); WRITE32(OP_RENEW); WRITE64(client_stateid->cl_clientid); + hdr->nops++; return 0; } static int -encode_restorefh(struct xdr_stream *xdr) +encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(4); WRITE32(OP_RESTOREFH); + hdr->nops++; return 0; } static int -encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr) { __be32 *p; @@ -1278,21 +1309,23 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) RESERVE_SPACE(4); WRITE32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + hdr->nops++; return 0; } static int -encode_savefh(struct xdr_stream *xdr) +encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(4); WRITE32(OP_SAVEFH); + hdr->nops++; return 0; } -static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server) +static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) { int status; __be32 *p; @@ -1300,6 +1333,7 @@ static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs * RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_SETATTR); WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); + hdr->nops++; if ((status = encode_attrs(xdr, arg->iap, server))) return status; @@ -1307,7 +1341,7 @@ static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs * return 0; } -static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid) +static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) { __be32 *p; @@ -1322,11 +1356,12 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); RESERVE_SPACE(4); WRITE32(setclientid->sc_cb_ident); + hdr->nops++; return 0; } -static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state) +static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) { __be32 *p; @@ -1334,11 +1369,12 @@ static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_c WRITE32(OP_SETCLIENTID_CONFIRM); WRITE64(client_state->cl_clientid); WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); + hdr->nops++; return 0; } -static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args) +static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) { __be32 *p; @@ -1353,11 +1389,12 @@ static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args WRITE32(args->count); xdr_write_pages(xdr, args->pages, args->pgbase, args->count); + hdr->nops++; return 0; } -static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid) +static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) { __be32 *p; @@ -1365,6 +1402,7 @@ static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *statei WRITE32(OP_DELEGRETURN); WRITEMEM(stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; return 0; } @@ -1379,20 +1417,21 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status != 0) goto out; - status = encode_access(&xdr, args->access); + status = encode_access(&xdr, args->access, &hdr); if (status != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1403,20 +1442,21 @@ static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 4, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + if ((status = encode_putfh(&xdr, args->dir_fh, &hdr)) != 0) goto out; - if ((status = encode_lookup(&xdr, args->name)) != 0) + if ((status = encode_lookup(&xdr, args->name, &hdr)) != 0) goto out; - if ((status = encode_getfh(&xdr)) != 0) + if ((status = encode_getfh(&xdr, &hdr)) != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1427,17 +1467,18 @@ static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struc { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putrootfh(&xdr)) != 0) + if ((status = encode_putrootfh(&xdr, &hdr)) != 0) goto out; - if ((status = encode_getfh(&xdr)) == 0) - status = encode_getfattr(&xdr, args->bitmask); + if ((status = encode_getfh(&xdr, &hdr)) == 0) + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1448,18 +1489,19 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh)) != 0) + if ((status = encode_putfh(&xdr, args->fh, &hdr)) != 0) goto out; - if ((status = encode_remove(&xdr, &args->name)) != 0) + if ((status = encode_remove(&xdr, &args->name, &hdr)) != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1470,26 +1512,28 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 7, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->old_dir)) != 0) + if ((status = encode_putfh(&xdr, args->old_dir, &hdr)) != 0) goto out; - if ((status = encode_savefh(&xdr)) != 0) + if ((status = encode_savefh(&xdr, &hdr)) != 0) goto out; - if ((status = encode_putfh(&xdr, args->new_dir)) != 0) + if ((status = encode_putfh(&xdr, args->new_dir, &hdr)) != 0) goto out; - if ((status = encode_rename(&xdr, args->old_name, args->new_name)) != 0) + if ((status = encode_rename(&xdr, args->old_name, args->new_name, + &hdr)) != 0) goto out; - if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + if ((status = encode_getfattr(&xdr, args->bitmask, &hdr)) != 0) goto out; - if ((status = encode_restorefh(&xdr)) != 0) + if ((status = encode_restorefh(&xdr, &hdr)) != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1500,26 +1544,27 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 7, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh)) != 0) + if ((status = encode_putfh(&xdr, args->fh, &hdr)) != 0) goto out; - if ((status = encode_savefh(&xdr)) != 0) + if ((status = encode_savefh(&xdr, &hdr)) != 0) goto out; - if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + if ((status = encode_putfh(&xdr, args->dir_fh, &hdr)) != 0) goto out; - if ((status = encode_link(&xdr, args->name)) != 0) + if ((status = encode_link(&xdr, args->name, &hdr)) != 0) goto out; - if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + if ((status = encode_getfattr(&xdr, args->bitmask, &hdr)) != 0) goto out; - if ((status = encode_restorefh(&xdr)) != 0) + if ((status = encode_restorefh(&xdr, &hdr)) != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1530,26 +1575,27 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 7, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + if ((status = encode_putfh(&xdr, args->dir_fh, &hdr)) != 0) goto out; - if ((status = encode_savefh(&xdr)) != 0) + if ((status = encode_savefh(&xdr, &hdr)) != 0) goto out; - if ((status = encode_create(&xdr, args)) != 0) + if ((status = encode_create(&xdr, args, &hdr)) != 0) goto out; - if ((status = encode_getfh(&xdr)) != 0) + if ((status = encode_getfh(&xdr, &hdr)) != 0) goto out; - if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + if ((status = encode_getfattr(&xdr, args->bitmask, &hdr)) != 0) goto out; - if ((status = encode_restorefh(&xdr)) != 0) + if ((status = encode_restorefh(&xdr, &hdr)) != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1568,14 +1614,15 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh)) == 0) - status = encode_getfattr(&xdr, args->bitmask); + if ((status = encode_putfh(&xdr, args->fh, &hdr)) == 0) + status = encode_getfattr(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); return status; } @@ -1586,20 +1633,21 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closea { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if(status) goto out; - status = encode_close(&xdr, args); + status = encode_close(&xdr, args, &hdr); if (status != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1610,32 +1658,33 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openarg { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 7, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status) goto out; - status = encode_savefh(&xdr); + status = encode_savefh(&xdr, &hdr); if (status) goto out; - status = encode_open(&xdr, args); + status = encode_open(&xdr, args, &hdr); if (status) goto out; - status = encode_getfh(&xdr); + status = encode_getfh(&xdr, &hdr); if (status) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); if (status) goto out; - status = encode_restorefh(&xdr); + status = encode_restorefh(&xdr, &hdr); if (status) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1646,17 +1695,18 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if(status) goto out; - status = encode_open_confirm(&xdr, args); + status = encode_open_confirm(&xdr, args, &hdr); out: + encode_nops(&hdr); return status; } @@ -1667,20 +1717,21 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status) goto out; - status = encode_open(&xdr, args); + status = encode_open(&xdr, args, &hdr); if (status) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1691,20 +1742,21 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status) goto out; - status = encode_open_downgrade(&xdr, args); + status = encode_open_downgrade(&xdr, args, &hdr); if (status != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1715,17 +1767,18 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_ar { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if(status) goto out; - status = encode_lock(&xdr, args); + status = encode_lock(&xdr, args, &hdr); out: + encode_nops(&hdr); return status; } @@ -1736,17 +1789,18 @@ static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if(status) goto out; - status = encode_lockt(&xdr, args); + status = encode_lockt(&xdr, args, &hdr); out: + encode_nops(&hdr); return status; } @@ -1757,17 +1811,18 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if(status) goto out; - status = encode_locku(&xdr, args); + status = encode_locku(&xdr, args, &hdr); out: + encode_nops(&hdr); return status; } @@ -1778,7 +1833,7 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; @@ -1786,10 +1841,10 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if(status) goto out; - status = encode_readlink(&xdr, args, req); + status = encode_readlink(&xdr, args, req, &hdr); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1800,6 +1855,7 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n args->pgbase, args->pglen); out: + encode_nops(&hdr); return status; } @@ -1810,7 +1866,7 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; int replen; @@ -1818,10 +1874,10 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if(status) goto out; - status = encode_readdir(&xdr, args, req); + status = encode_readdir(&xdr, args, req, &hdr); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1835,6 +1891,7 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf args->pgbase, args->count); out: + encode_nops(&hdr); return status; } @@ -1846,16 +1903,16 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int replen, status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status) goto out; - status = encode_read(&xdr, args); + status = encode_read(&xdr, args, &hdr); if (status) goto out; @@ -1868,6 +1925,7 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg args->pages, args->pgbase, args->count); req->rq_rcv_buf.flags |= XDRBUF_READ; out: + encode_nops(&hdr); return status; } @@ -1878,20 +1936,21 @@ static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_seta { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if(status) goto out; - status = encode_setattr(&xdr, args, args->server); + status = encode_setattr(&xdr, args, args->server, &hdr); if(status) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1905,21 +1964,22 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, struct xdr_stream xdr; struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int replen, status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status) goto out; - status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); /* set up reply buffer: */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->acl_pages, args->acl_pgbase, args->acl_len); out: + encode_nops(&hdr); return status; } @@ -1930,21 +1990,22 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status) goto out; - status = encode_write(&xdr, args); + status = encode_write(&xdr, args, &hdr); if (status) goto out; req->rq_snd_buf.flags |= XDRBUF_WRITE; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1955,20 +2016,21 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_write { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status) goto out; - status = encode_commit(&xdr, args); + status = encode_commit(&xdr, args, &hdr); if (status) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -1979,15 +2041,16 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsin { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (!status) - status = encode_fsinfo(&xdr, args->bitmask); + status = encode_fsinfo(&xdr, args->bitmask, &hdr); + encode_nops(&hdr); return status; } @@ -1998,16 +2061,18 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (!status) status = encode_getattr_one(&xdr, - args->bitmask[0] & nfs4_pathconf_bitmap[0]); + args->bitmask[0] & nfs4_pathconf_bitmap[0], + &hdr); + encode_nops(&hdr); return status; } @@ -2018,17 +2083,18 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status == 0) status = encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], - args->bitmask[1] & nfs4_statfs_bitmap[1]); + args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); + encode_nops(&hdr); return status; } @@ -2039,18 +2105,19 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struc { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, fhandle); + status = encode_putfh(&xdr, fhandle, &hdr); if (status == 0) status = encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| FATTR4_WORD0_LINK_SUPPORT| FATTR4_WORD0_SYMLINK_SUPPORT| - FATTR4_WORD0_ACLSUPPORT); + FATTR4_WORD0_ACLSUPPORT, &hdr); + encode_nops(&hdr); return status; } @@ -2061,12 +2128,15 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 1, + .nops = 0, }; + int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - return encode_renew(&xdr, clp); + status = encode_renew(&xdr, clp, &hdr); + encode_nops(&hdr); + return status; } /* @@ -2076,12 +2146,15 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4 { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 1, + .nops = 0, }; + int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - return encode_setclientid(&xdr, sc); + status = encode_setclientid(&xdr, sc, &hdr); + encode_nops(&hdr); + return status; } /* @@ -2091,18 +2164,19 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_setclientid_confirm(&xdr, clp); + status = encode_setclientid_confirm(&xdr, clp, &hdr); if (!status) - status = encode_putrootfh(&xdr); + status = encode_putrootfh(&xdr, &hdr); if (!status) - status = encode_fsinfo(&xdr, lease_bitmap); + status = encode_fsinfo(&xdr, lease_bitmap, &hdr); + encode_nops(&hdr); return status; } @@ -2113,20 +2187,21 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struc { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fhandle); + status = encode_putfh(&xdr, args->fhandle, &hdr); if (status != 0) goto out; - status = encode_delegreturn(&xdr, args->stateid); + status = encode_delegreturn(&xdr, args->stateid, &hdr); if (status != 0) goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_getfattr(&xdr, args->bitmask, &hdr); out: + encode_nops(&hdr); return status; } @@ -2137,7 +2212,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 0, }; struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; int replen; @@ -2145,11 +2220,11 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + if ((status = encode_putfh(&xdr, args->dir_fh, &hdr)) != 0) goto out; - if ((status = encode_lookup(&xdr, args->name)) != 0) + if ((status = encode_lookup(&xdr, args->name, &hdr)) != 0) goto out; - if ((status = encode_fs_locations(&xdr, args->bitmask)) != 0) + if ((status = encode_fs_locations(&xdr, args->bitmask, &hdr)) != 0) goto out; /* set up reply * toplevel_status + OP_PUTFH + status @@ -2159,6 +2234,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, 0, PAGE_SIZE); out: + encode_nops(&hdr); return status; } @@ -4033,17 +4109,18 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 0, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); + status = encode_putfh(&xdr, args->fh, &hdr); if (status) goto out; - status = encode_setacl(&xdr, args); + status = encode_setacl(&xdr, args, &hdr); out: + encode_nops(&hdr); return status; } -- cgit v1.2.3 From cf8cdbe5bd662eeaece96b017a4d6676ae416537 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 23 Dec 2008 16:06:18 -0500 Subject: NFS: remove unused status from encode routines Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 578 +++++++++++++++++-------------------------------------- 1 file changed, 180 insertions(+), 398 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8f6c061bb6c..d1e4c8f8a0a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -607,7 +607,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); } -static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) +static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) { char owner_name[IDMAP_NAMESZ]; char owner_group[IDMAP_NAMESZ]; @@ -618,7 +618,6 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s int len; uint32_t bmval0 = 0; uint32_t bmval1 = 0; - int status; /* * We reserve enough space to write the entire attribute buffer at once. @@ -729,12 +728,10 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s *q++ = htonl(bmval1); *q++ = htonl(len); - status = 0; /* out: */ - return status; } -static int encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) +static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) { __be32 *p; @@ -742,11 +739,9 @@ static int encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr WRITE32(OP_ACCESS); WRITE32(access); hdr->nops++; - - return 0; } -static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) +static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { __be32 *p; @@ -755,11 +750,9 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, WRITE32(arg->seqid->sequence->counter); WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); hdr->nops++; - - return 0; } -static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) { __be32 *p; @@ -768,11 +761,9 @@ static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *arg WRITE64(args->offset); WRITE32(args->count); hdr->nops++; - - return 0; } -static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) +static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) { __be32 *p; @@ -802,10 +793,10 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c WRITEMEM(create->name->name, create->name->len); hdr->nops++; - return encode_attrs(xdr, create->attrs, create->server); + encode_attrs(xdr, create->attrs, create->server); } -static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) +static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) { __be32 *p; @@ -814,10 +805,9 @@ static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct co WRITE32(1); WRITE32(bitmap); hdr->nops++; - return 0; } -static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) +static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) { __be32 *p; @@ -827,42 +817,36 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1 WRITE32(bm0); WRITE32(bm1); hdr->nops++; - return 0; } -static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) +static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - return encode_getattr_two(xdr, - bitmask[0] & nfs4_fattr_bitmap[0], - bitmask[1] & nfs4_fattr_bitmap[1], hdr); + encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], + bitmask[1] & nfs4_fattr_bitmap[1], hdr); } -static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) +static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], - bitmask[1] & nfs4_fsinfo_bitmap[1], hdr); + encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], + bitmask[1] & nfs4_fsinfo_bitmap[1], hdr); } -static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) +static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - return encode_getattr_two(xdr, - bitmask[0] & nfs4_fs_locations_bitmap[0], - bitmask[1] & nfs4_fs_locations_bitmap[1], - hdr); + encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0], + bitmask[1] & nfs4_fs_locations_bitmap[1], hdr); } -static int encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) +static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(4); WRITE32(OP_GETFH); hdr->nops++; - - return 0; } -static int encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) +static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { __be32 *p; @@ -871,8 +855,6 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name, struct c WRITE32(name->len); WRITEMEM(name->name, name->len); hdr->nops++; - - return 0; } static inline int nfs4_lock_type(struct file_lock *fl, int block) @@ -893,7 +875,7 @@ static inline uint64_t nfs4_lock_length(struct file_lock *fl) * opcode,type,reclaim,offset,length,new_lock_owner = 32 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 */ -static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args, struct compound_hdr *hdr) +static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args, struct compound_hdr *hdr) { __be32 *p; @@ -920,11 +902,9 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args, WRITE32(args->lock_seqid->sequence->counter); } hdr->nops++; - - return 0; } -static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) +static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) { __be32 *p; @@ -938,11 +918,9 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg WRITEMEM("lock id:", 8); WRITE64(args->lock_owner.id); hdr->nops++; - - return 0; } -static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) +static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) { __be32 *p; @@ -954,11 +932,9 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *arg WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); hdr->nops++; - - return 0; } -static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) +static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { int len = name->len; __be32 *p; @@ -968,8 +944,6 @@ static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct WRITE32(len); WRITEMEM(name->name, len); hdr->nops++; - - return 0; } static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) @@ -1091,7 +1065,7 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc encode_string(xdr, name->len, name->name); } -static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) +static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) { encode_openhdr(xdr, arg); encode_opentype(xdr, arg); @@ -1109,10 +1083,9 @@ static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, s BUG(); } hdr->nops++; - return 0; } -static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) +static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) { __be32 *p; @@ -1121,11 +1094,9 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); hdr->nops++; - - return 0; } -static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) +static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { __be32 *p; @@ -1135,10 +1106,9 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->fmode); hdr->nops++; - return 0; } -static int +static void encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) { int len = fh->size; @@ -1149,19 +1119,15 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd WRITE32(len); WRITEMEM(fh->data, len); hdr->nops++; - - return 0; } -static int encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) +static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(4); WRITE32(OP_PUTROOTFH); hdr->nops++; - - return 0; } static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) @@ -1177,7 +1143,7 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); } -static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) +static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) { __be32 *p; @@ -1190,11 +1156,9 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, WRITE64(args->offset); WRITE32(args->count); hdr->nops++; - - return 0; } -static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) +static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { uint32_t attrs[2] = { FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, @@ -1224,22 +1188,18 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg ((u32 *)readdir->verifier.data)[1], attrs[0] & readdir->bitmask[0], attrs[1] & readdir->bitmask[1]); - - return 0; } -static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) +static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) { __be32 *p; RESERVE_SPACE(4); WRITE32(OP_READLINK); hdr->nops++; - - return 0; } -static int encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) +static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { __be32 *p; @@ -1248,11 +1208,9 @@ static int encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct WRITE32(name->len); WRITEMEM(name->name, name->len); hdr->nops++; - - return 0; } -static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) +static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) { __be32 *p; @@ -1265,11 +1223,9 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con WRITE32(newname->len); WRITEMEM(newname->name, newname->len); hdr->nops++; - - return 0; } -static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) +static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) { __be32 *p; @@ -1277,11 +1233,9 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_ WRITE32(OP_RENEW); WRITE64(client_stateid->cl_clientid); hdr->nops++; - - return 0; } -static int +static void encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; @@ -1289,8 +1243,6 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_RESTOREFH); hdr->nops++; - - return 0; } static int @@ -1313,7 +1265,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun return 0; } -static int +static void encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; @@ -1321,27 +1273,20 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_SAVEFH); hdr->nops++; - - return 0; } -static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) +static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) { - int status; __be32 *p; RESERVE_SPACE(4+NFS4_STATEID_SIZE); WRITE32(OP_SETATTR); WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); hdr->nops++; - - if ((status = encode_attrs(xdr, arg->iap, server))) - return status; - - return 0; + encode_attrs(xdr, arg->iap, server); } -static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) +static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) { __be32 *p; @@ -1357,11 +1302,9 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien RESERVE_SPACE(4); WRITE32(setclientid->sc_cb_ident); hdr->nops++; - - return 0; } -static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) +static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) { __be32 *p; @@ -1370,11 +1313,9 @@ static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_c WRITE64(client_state->cl_clientid); WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); hdr->nops++; - - return 0; } -static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) { __be32 *p; @@ -1390,11 +1331,9 @@ static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args xdr_write_pages(xdr, args->pages, args->pgbase, args->count); hdr->nops++; - - return 0; } -static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) +static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) { __be32 *p; @@ -1403,8 +1342,6 @@ static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *statei WRITE32(OP_DELEGRETURN); WRITEMEM(stateid->data, NFS4_STATEID_SIZE); hdr->nops++; - return 0; - } /* * END OF "GENERIC" ENCODE ROUTINES. @@ -1419,20 +1356,14 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status != 0) - goto out; - status = encode_access(&xdr, args->access, &hdr); - if (status != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_access(&xdr, args->access, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1444,20 +1375,15 @@ static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->dir_fh, &hdr)) != 0) - goto out; - if ((status = encode_lookup(&xdr, args->name, &hdr)) != 0) - goto out; - if ((status = encode_getfh(&xdr, &hdr)) != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->dir_fh, &hdr); + encode_lookup(&xdr, args->name, &hdr); + encode_getfh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1469,17 +1395,14 @@ static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struc struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putrootfh(&xdr, &hdr)) != 0) - goto out; - if ((status = encode_getfh(&xdr, &hdr)) == 0) - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putrootfh(&xdr, &hdr); + encode_getfh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1491,18 +1414,14 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh, &hdr)) != 0) - goto out; - if ((status = encode_remove(&xdr, &args->name, &hdr)) != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_remove(&xdr, &args->name, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1514,27 +1433,18 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->old_dir, &hdr)) != 0) - goto out; - if ((status = encode_savefh(&xdr, &hdr)) != 0) - goto out; - if ((status = encode_putfh(&xdr, args->new_dir, &hdr)) != 0) - goto out; - if ((status = encode_rename(&xdr, args->old_name, args->new_name, - &hdr)) != 0) - goto out; - if ((status = encode_getfattr(&xdr, args->bitmask, &hdr)) != 0) - goto out; - if ((status = encode_restorefh(&xdr, &hdr)) != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->old_dir, &hdr); + encode_savefh(&xdr, &hdr); + encode_putfh(&xdr, args->new_dir, &hdr); + encode_rename(&xdr, args->old_name, args->new_name, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_restorefh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1546,26 +1456,18 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_ struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh, &hdr)) != 0) - goto out; - if ((status = encode_savefh(&xdr, &hdr)) != 0) - goto out; - if ((status = encode_putfh(&xdr, args->dir_fh, &hdr)) != 0) - goto out; - if ((status = encode_link(&xdr, args->name, &hdr)) != 0) - goto out; - if ((status = encode_getfattr(&xdr, args->bitmask, &hdr)) != 0) - goto out; - if ((status = encode_restorefh(&xdr, &hdr)) != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_savefh(&xdr, &hdr); + encode_putfh(&xdr, args->dir_fh, &hdr); + encode_link(&xdr, args->name, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_restorefh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1577,26 +1479,18 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->dir_fh, &hdr)) != 0) - goto out; - if ((status = encode_savefh(&xdr, &hdr)) != 0) - goto out; - if ((status = encode_create(&xdr, args, &hdr)) != 0) - goto out; - if ((status = encode_getfh(&xdr, &hdr)) != 0) - goto out; - if ((status = encode_getfattr(&xdr, args->bitmask, &hdr)) != 0) - goto out; - if ((status = encode_restorefh(&xdr, &hdr)) != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->dir_fh, &hdr); + encode_savefh(&xdr, &hdr); + encode_create(&xdr, args, &hdr); + encode_getfh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_restorefh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1616,14 +1510,13 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh, &hdr)) == 0) - status = encode_getfattr(&xdr, args->bitmask, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1635,20 +1528,14 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closea struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if(status) - goto out; - status = encode_close(&xdr, args, &hdr); - if (status != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_close(&xdr, args, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1660,32 +1547,18 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openarg struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status) - goto out; - status = encode_savefh(&xdr, &hdr); - if (status) - goto out; - status = encode_open(&xdr, args, &hdr); - if (status) - goto out; - status = encode_getfh(&xdr, &hdr); - if (status) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); - if (status) - goto out; - status = encode_restorefh(&xdr, &hdr); - if (status) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_savefh(&xdr, &hdr); + encode_open(&xdr, args, &hdr); + encode_getfh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); + encode_restorefh(&xdr, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1697,17 +1570,13 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if(status) - goto out; - status = encode_open_confirm(&xdr, args, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_open_confirm(&xdr, args, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1719,20 +1588,14 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_ struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status) - goto out; - status = encode_open(&xdr, args, &hdr); - if (status) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_open(&xdr, args, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1744,20 +1607,14 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct n struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status) - goto out; - status = encode_open_downgrade(&xdr, args, &hdr); - if (status != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_open_downgrade(&xdr, args, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1769,17 +1626,13 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_ar struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if(status) - goto out; - status = encode_lock(&xdr, args, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_lock(&xdr, args, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1791,17 +1644,13 @@ static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_ struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if(status) - goto out; - status = encode_lockt(&xdr, args, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_lockt(&xdr, args, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1813,17 +1662,13 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_ struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if(status) - goto out; - status = encode_locku(&xdr, args, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_locku(&xdr, args, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1837,14 +1682,11 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n }; struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; unsigned int replen; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if(status) - goto out; - status = encode_readlink(&xdr, args, req, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_readlink(&xdr, args, req, &hdr); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1853,10 +1695,8 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->pglen); - -out: encode_nops(&hdr); - return status; + return 0; } /* @@ -1870,14 +1710,11 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf }; struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; int replen; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if(status) - goto out; - status = encode_readdir(&xdr, args, req, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_readdir(&xdr, args, req, &hdr); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1889,10 +1726,8 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", __func__, replen, args->pages, args->pgbase, args->count); - -out: encode_nops(&hdr); - return status; + return 0; } /* @@ -1905,16 +1740,12 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg struct compound_hdr hdr = { .nops = 0, }; - int replen, status; + int replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status) - goto out; - status = encode_read(&xdr, args, &hdr); - if (status) - goto out; + encode_putfh(&xdr, args->fh, &hdr); + encode_read(&xdr, args, &hdr); /* set up reply kvec * toplevel status + taglen=0 + rescount + OP_PUTFH + status @@ -1924,9 +1755,8 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->count); req->rq_rcv_buf.flags |= XDRBUF_READ; -out: encode_nops(&hdr); - return status; + return 0; } /* @@ -1938,20 +1768,14 @@ static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_seta struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if(status) - goto out; - status = encode_setattr(&xdr, args, args->server, &hdr); - if(status) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_setattr(&xdr, args, args->server, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -1966,21 +1790,19 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, struct compound_hdr hdr = { .nops = 0, }; - int replen, status; + int replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status) - goto out; - status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); + /* set up reply buffer: */ replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->acl_pages, args->acl_pgbase, args->acl_len); -out: encode_nops(&hdr); - return status; + return 0; } /* @@ -1992,21 +1814,15 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status) - goto out; - status = encode_write(&xdr, args, &hdr); - if (status) - goto out; + encode_putfh(&xdr, args->fh, &hdr); + encode_write(&xdr, args, &hdr); req->rq_snd_buf.flags |= XDRBUF_WRITE; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2018,20 +1834,14 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_write struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status) - goto out; - status = encode_commit(&xdr, args, &hdr); - if (status) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fh, &hdr); + encode_commit(&xdr, args, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2043,15 +1853,13 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsin struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (!status) - status = encode_fsinfo(&xdr, args->bitmask, &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_fsinfo(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2063,17 +1871,14 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct n struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (!status) - status = encode_getattr_one(&xdr, - args->bitmask[0] & nfs4_pathconf_bitmap[0], - &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], + &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2085,17 +1890,14 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status == 0) - status = encode_getattr_two(&xdr, - args->bitmask[0] & nfs4_statfs_bitmap[0], - args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); + encode_putfh(&xdr, args->fh, &hdr); + encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], + args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2107,18 +1909,16 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struc struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, fhandle, &hdr); - if (status == 0) - status = encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| - FATTR4_WORD0_LINK_SUPPORT| - FATTR4_WORD0_SYMLINK_SUPPORT| - FATTR4_WORD0_ACLSUPPORT, &hdr); + encode_putfh(&xdr, fhandle, &hdr); + encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| + FATTR4_WORD0_LINK_SUPPORT| + FATTR4_WORD0_SYMLINK_SUPPORT| + FATTR4_WORD0_ACLSUPPORT, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2130,13 +1930,12 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_renew(&xdr, clp, &hdr); + encode_renew(&xdr, clp, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2148,13 +1947,12 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4 struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_setclientid(&xdr, sc, &hdr); + encode_setclientid(&xdr, sc, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2167,17 +1965,14 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str .nops = 0, }; const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_setclientid_confirm(&xdr, clp, &hdr); - if (!status) - status = encode_putrootfh(&xdr, &hdr); - if (!status) - status = encode_fsinfo(&xdr, lease_bitmap, &hdr); + encode_setclientid_confirm(&xdr, clp, &hdr); + encode_putrootfh(&xdr, &hdr); + encode_fsinfo(&xdr, lease_bitmap, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2189,20 +1984,14 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struc struct compound_hdr hdr = { .nops = 0, }; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fhandle, &hdr); - if (status != 0) - goto out; - status = encode_delegreturn(&xdr, args->stateid, &hdr); - if (status != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask, &hdr); -out: + encode_putfh(&xdr, args->fhandle, &hdr); + encode_delegreturn(&xdr, args->stateid, &hdr); + encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); - return status; + return 0; } /* @@ -2216,16 +2005,13 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs }; struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; int replen; - int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->dir_fh, &hdr)) != 0) - goto out; - if ((status = encode_lookup(&xdr, args->name, &hdr)) != 0) - goto out; - if ((status = encode_fs_locations(&xdr, args->bitmask, &hdr)) != 0) - goto out; + encode_putfh(&xdr, args->dir_fh, &hdr); + encode_lookup(&xdr, args->name, &hdr); + encode_fs_locations(&xdr, args->bitmask, &hdr); + /* set up reply * toplevel_status + OP_PUTFH + status * + OP_LOOKUP + status + OP_GETATTR + status = 7 @@ -2233,9 +2019,8 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, 0, PAGE_SIZE); -out: encode_nops(&hdr); - return status; + return 0; } /* @@ -4115,11 +3900,8 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh, &hdr); - if (status) - goto out; + encode_putfh(&xdr, args->fh, &hdr); status = encode_setacl(&xdr, args, &hdr); -out: encode_nops(&hdr); return status; } -- cgit v1.2.3 From 68e76ad0baf8f5d5060377c2423ee6eed5c63057 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Dec 2008 16:17:15 -0500 Subject: nfsd: pass client principal name in rsc downcall Two principals are involved in krb5 authentication: the target, who we authenticate *to* (normally the name of the server, like nfs/server.citi.umich.edu@CITI.UMICH.EDU), and the source, we we authenticate *as* (normally a user, like bfields@UMICH.EDU) In the case of NFSv4 callbacks, the target of the callback should be the source of the client's setclientid call, and the source should be the nfs server's own principal. Therefore we allow svcgssd to pass down the name of the principal that just authenticated, so that on setclientid we can store that principal name with the new client, to be used later on callbacks. Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4state.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1a052ac2bde..f3b9a8d064f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -54,6 +54,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -377,6 +378,7 @@ free_client(struct nfs4_client *clp) shutdown_callback_client(clp); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); + kfree(clp->cl_principal); kfree(clp->cl_name.data); kfree(clp); } @@ -696,6 +698,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unsigned int strhashval; struct nfs4_client *conf, *unconf, *new; __be32 status; + char *princ; char dname[HEXDIR_LEN]; if (!check_name(clname)) @@ -783,6 +786,14 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } copy_verf(new, &clverifier); new->cl_addr = sin->sin_addr.s_addr; + princ = svc_gss_principal(rqstp); + if (princ) { + new->cl_principal = kstrdup(princ, GFP_KERNEL); + if (new->cl_principal == NULL) { + free_client(new); + goto out; + } + } copy_cred(&new->cl_cred, &rqstp->rq_cred); gen_confirm(new); gen_callback(new, setclid); -- cgit v1.2.3 From 608207e8884e083ad8b8d33eda868da70f0d63e8 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Dec 2008 16:17:40 -0500 Subject: rpc: pass target name down to rpc level on callbacks The rpc client needs to know the principal that the setclientid was done as, so it can tell gssd who to authenticate to. Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4callback.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 094747a1227..3ca14178214 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -384,6 +384,7 @@ static int do_probe_callback(void *data) .version = nfs_cb_version[1]->number, .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), + .client_name = clp->cl_principal, }; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], @@ -392,6 +393,11 @@ static int do_probe_callback(void *data) struct rpc_clnt *client; int status; + if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) { + status = nfserr_cb_path_down; + goto out_err; + } + /* Initialize address */ memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; -- cgit v1.2.3 From 945b34a7725a5f0741de7775132aafc58bfecfbb Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Dec 2008 16:18:34 -0500 Subject: rpc: allow gss callbacks to client This patch adds client-side support to allow for callbacks other than AUTH_SYS. Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index c2e9cfd9e5a..3e634f2a108 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -182,10 +183,34 @@ void nfs_callback_down(void) mutex_unlock(&nfs_callback_mutex); } +static int check_gss_callback_principal(struct nfs_client *clp, + struct svc_rqst *rqstp) +{ + struct rpc_clnt *r = clp->cl_rpcclient; + char *p = svc_gss_principal(rqstp); + + /* + * It might just be a normal user principal, in which case + * userspace won't bother to tell us the name at all. + */ + if (p == NULL) + return SVC_DENIED; + + /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ + + if (memcmp(p, "nfs@", 4) != 0) + return SVC_DENIED; + p += 4; + if (strcmp(p, r->cl_server) != 0) + return SVC_DENIED; + return SVC_OK; +} + static int nfs_callback_authenticate(struct svc_rqst *rqstp) { struct nfs_client *clp; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); + int ret = SVC_OK; /* Don't talk to strangers */ clp = nfs_find_client(svc_addr(rqstp), 4); @@ -194,21 +219,22 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) dprintk("%s: %s NFSv4 callback!\n", __func__, svc_print_addr(rqstp, buf, sizeof(buf))); - nfs_put_client(clp); switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) - return SVC_DENIED; + ret = SVC_DENIED; break; case RPC_AUTH_UNIX: break; case RPC_AUTH_GSS: - /* FIXME: RPCSEC_GSS handling? */ + ret = check_gss_callback_principal(clp, rqstp); + break; default: - return SVC_DENIED; + ret = SVC_DENIED; } - return SVC_OK; + nfs_put_client(clp); + return ret; } /* -- cgit v1.2.3 From 61054b14d545e257b9415d5ca0cd5f43762b4d0c Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 23 Dec 2008 16:19:00 -0500 Subject: nfsd: support callbacks with gss flavors This patch adds server-side support for callbacks other than AUTH_SYS. Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4callback.c | 3 ++- fs/nfsd/nfs4state.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3ca14178214..6d7d8c02c19 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -358,6 +358,7 @@ static struct rpc_program cb_program = { .nrvers = ARRAY_SIZE(nfs_cb_version), .version = nfs_cb_version, .stats = &cb_stats, + .pipe_dir_name = "/nfsd4_cb", }; /* Reference counting, callback cleanup, etc., all look racy as heck. @@ -382,7 +383,7 @@ static int do_probe_callback(void *data) .program = &cb_program, .prognumber = cb->cb_prog, .version = nfs_cb_version[1]->number, - .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ + .authflavor = clp->cl_flavor, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), .client_name = clp->cl_principal, }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f3b9a8d064f..07db31568ac 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -786,6 +786,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } copy_verf(new, &clverifier); new->cl_addr = sin->sin_addr.s_addr; + new->cl_flavor = rqstp->rq_flavor; princ = svc_gss_principal(rqstp); if (princ) { new->cl_principal = kstrdup(princ, GFP_KERNEL); -- cgit v1.2.3 From 25051158bbed127e8672b43396c71c5eb610e5f1 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Wed, 24 Dec 2008 14:07:32 +1100 Subject: [XFS] Fix race in xfs_write() between direct and buffered I/O with DMAPI The iolock is dropped and re-acquired around the call to XFS_SEND_NAMESP(). While the iolock is released the file can become cached. We then 'goto retry' and - if we are doing direct I/O - mapping->nrpages may now be non zero but need_i_mutex will be zero and we will hit the WARN_ON(). Since we have dropped the I/O lock then the file size may have also changed so what we need to do here is 'goto start' like we do for the XFS_SEND_DATA() DMAPI event. We also need to update the filesize before releasing the iolock so that needs to be done before the XFS_SEND_NAMESP event. If we drop the iolock before setting the filesize we could race with a truncate. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_lrw.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 92ce34b3787..7e90daa0d1d 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -707,7 +707,6 @@ start: } } -retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -763,6 +762,17 @@ retry: if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); + isize = i_size_read(inode); + if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) + *offset = isize; + + if (*offset > xip->i_size) { + xfs_ilock(xip, XFS_ILOCK_EXCL); + if (*offset > xip->i_size) + xip->i_size = *offset; + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } + if (ret == -ENOSPC && DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_iunlock(xip, iolock); @@ -776,20 +786,7 @@ retry: xfs_ilock(xip, iolock); if (error) goto out_unlock_internal; - pos = xip->i_size; - ret = 0; - goto retry; - } - - isize = i_size_read(inode); - if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) - *offset = isize; - - if (*offset > xip->i_size) { - xfs_ilock(xip, XFS_ILOCK_EXCL); - if (*offset > xip->i_size) - xip->i_size = *offset; - xfs_iunlock(xip, XFS_ILOCK_EXCL); + goto start; } error = -ret; -- cgit v1.2.3 From 08bafc0341f2f7920e9045bc32c40299cac8c21b Mon Sep 17 00:00:00 2001 From: Keith Mannthey Date: Tue, 25 Nov 2008 10:24:35 +0100 Subject: block: Supress Buffer I/O errors when SCSI REQ_QUIET flag set Allow the scsi request REQ_QUIET flag to be propagated to the buffer file system layer. The basic ideas is to pass the flag from the scsi request to the bio (block IO) and then to the buffer layer. The buffer layer can then suppress needless printks. This patch declutters the kernel log by removed the 40-50 (per lun) buffer io error messages seen during a boot in my multipath setup . It is a good chance any real errors will be missed in the "noise" it the logs without this patch. During boot I see blocks of messages like " __ratelimit: 211 callbacks suppressed Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242847 Buffer I/O error on device sdm, logical block 1 Buffer I/O error on device sdm, logical block 5242878 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242879 Buffer I/O error on device sdm, logical block 5242872 " in my logs. My disk environment is multipath fiber channel using the SCSI_DH_RDAC code and multipathd. This topology includes an "active" and "ghost" path for each lun. IO's to the "ghost" path will never complete and the SCSI layer, via the scsi device handler rdac code, quick returns the IOs to theses paths and sets the REQ_QUIET scsi flag to suppress the scsi layer messages. I am wanting to extend the QUIET behavior to include the buffer file system layer to deal with these errors as well. I have been running this patch for a while now on several boxes without issue. A few runs of bonnie++ show no noticeable difference in performance in my setup. Thanks for John Stultz for the quiet_error finalization. Submitted-by: Keith Mannthey Signed-off-by: Jens Axboe --- fs/buffer.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 10179cfa115..776ae091d3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page) page_cache_release(page); } + +static int quiet_error(struct buffer_head *bh) +{ + if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit()) + return 0; + return 1; +} + + static void buffer_io_error(struct buffer_head *bh) { char b[BDEVNAME_SIZE]; - printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); @@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { + if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { buffer_io_error(bh); printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", @@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) set_buffer_uptodate(bh); } else { clear_buffer_uptodate(bh); - if (printk_ratelimit()) + if (!quiet_error(bh)) buffer_io_error(bh); SetPageError(page); } @@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (printk_ratelimit()) { + if (!quiet_error(bh)) { buffer_io_error(bh); printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", @@ -2913,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) set_bit(BH_Eopnotsupp, &bh->b_state); } + if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) + set_bit(BH_Quiet, &bh->b_state); + bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); bio_put(bio); } -- cgit v1.2.3 From 7ff9345ffac56743b5001561bc2dc1e041b79149 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 11 Dec 2008 11:53:43 +0100 Subject: bio: only mempool back the largest bio_vec slab cache We only very rarely need the mempool backing, so it makes sense to get rid of all but one of the mempool in a bio_set. So keep the largest bio_vec count mempool so we can always honor the largest allocation, and "upgrade" callers that fail. Signed-off-by: Jens Axboe --- fs/bio.c | 125 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 72 insertions(+), 53 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index df99c882b80..eb6b4683a26 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -58,7 +58,8 @@ unsigned int bvec_nr_vecs(unsigned short idx) return bvec_slabs[idx].nr_vecs; } -struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) +struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, + struct bio_set *bs) { struct bio_vec *bvl; @@ -67,60 +68,90 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct * If not, this is a bio_kmalloc() allocation and just do a * kzalloc() for the exact number of vecs right away. */ - if (bs) { + if (!bs) + bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); + + /* + * see comment near bvec_array define! + */ + switch (nr) { + case 1: + *idx = 0; + break; + case 2 ... 4: + *idx = 1; + break; + case 5 ... 16: + *idx = 2; + break; + case 17 ... 64: + *idx = 3; + break; + case 65 ... 128: + *idx = 4; + break; + case 129 ... BIO_MAX_PAGES: + *idx = 5; + break; + default: + return NULL; + } + + /* + * idx now points to the pool we want to allocate from. only the + * 1-vec entry pool is mempool backed. + */ + if (*idx == BIOVEC_MAX_IDX) { +fallback: + bvl = mempool_alloc(bs->bvec_pool, gfp_mask); + } else { + struct biovec_slab *bvs = bvec_slabs + *idx; + gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO); + /* - * see comment near bvec_array define! + * Make this allocation restricted and don't dump info on + * allocation failures, since we'll fallback to the mempool + * in case of failure. */ - switch (nr) { - case 1: - *idx = 0; - break; - case 2 ... 4: - *idx = 1; - break; - case 5 ... 16: - *idx = 2; - break; - case 17 ... 64: - *idx = 3; - break; - case 65 ... 128: - *idx = 4; - break; - case 129 ... BIO_MAX_PAGES: - *idx = 5; - break; - default: - return NULL; - } + __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; /* - * idx now points to the pool we want to allocate from + * Try a slab allocation. If this fails and __GFP_WAIT + * is set, retry with the 1-entry mempool */ - bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); - if (bvl) - memset(bvl, 0, - bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); - } else - bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); + bvl = kmem_cache_alloc(bvs->slab, __gfp_mask); + if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) { + *idx = BIOVEC_MAX_IDX; + goto fallback; + } + } + + if (bvl) + memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); return bvl; } -void bio_free(struct bio *bio, struct bio_set *bio_set) +void bio_free(struct bio *bio, struct bio_set *bs) { if (bio->bi_io_vec) { const int pool_idx = BIO_POOL_IDX(bio); BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); - mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); + if (pool_idx == BIOVEC_MAX_IDX) + mempool_free(bio->bi_io_vec, bs->bvec_pool); + else { + struct biovec_slab *bvs = bvec_slabs + pool_idx; + + kmem_cache_free(bvs->slab, bio->bi_io_vec); + } } if (bio_integrity(bio)) - bio_integrity_free(bio, bio_set); + bio_integrity_free(bio, bs); - mempool_free(bio, bio_set->bio_pool); + mempool_free(bio, bs->bio_pool); } /* @@ -1346,30 +1377,18 @@ EXPORT_SYMBOL(bio_sector_offset); */ static int biovec_create_pools(struct bio_set *bs, int pool_entries) { - int i; + struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; - for (i = 0; i < BIOVEC_NR_POOLS; i++) { - struct biovec_slab *bp = bvec_slabs + i; - mempool_t **bvp = bs->bvec_pools + i; + bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab); + if (!bs->bvec_pool) + return -ENOMEM; - *bvp = mempool_create_slab_pool(pool_entries, bp->slab); - if (!*bvp) - return -ENOMEM; - } return 0; } static void biovec_free_pools(struct bio_set *bs) { - int i; - - for (i = 0; i < BIOVEC_NR_POOLS; i++) { - mempool_t *bvp = bs->bvec_pools[i]; - - if (bvp) - mempool_destroy(bvp); - } - + mempool_destroy(bs->bvec_pool); } void bioset_free(struct bio_set *bs) -- cgit v1.2.3 From 1b4344986926da324b5cd10b683e5a1a5e1b7db3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Oct 2008 20:32:58 +0200 Subject: bio: move the slab pointer inside the bio_set In preparation for adding differently sized bios. Signed-off-by: Jens Axboe --- fs/bio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index eb6b4683a26..1ab8986b041 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1404,12 +1404,15 @@ void bioset_free(struct bio_set *bs) struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) { - struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL); + struct bio_set *bs; + bs = kzalloc(sizeof(*bs), GFP_KERNEL); if (!bs) return NULL; - bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab); + bs->bio_slab = bio_slab; + + bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bs->bio_slab); if (!bs->bio_pool) goto bad; -- cgit v1.2.3 From bb799ca0202a360fa74d5f17039b9100caebdde7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Dec 2008 15:35:05 +0100 Subject: bio: allow individual slabs in the bio_set Instead of having a global bio slab cache, add a reference to one in each bio_set that is created. This allows for personalized slabs in each bio_set, so that they can have bios of different sizes. This means we can personalize the bios we return. File systems may want to embed the bio inside another structure, to avoid allocation more items (and stuffing them in ->bi_private) after the get a bio. Or we may want to embed a number of bio_vecs directly at the end of a bio, to avoid doing two allocations to return a bio. This is now possible. Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 2 +- fs/bio.c | 191 ++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 162 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 19caf7c962a..77ebc3c263d 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -111,7 +111,7 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) && bip->bip_buf != NULL) kfree(bip->bip_buf); - mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); + bvec_free_bs(bs, bip->bip_vec, bip->bip_pool); mempool_free(bip, bs->bio_integrity_pool); bio->bi_integrity = NULL; diff --git a/fs/bio.c b/fs/bio.c index 1ab8986b041..0146f80789e 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -31,8 +31,6 @@ DEFINE_TRACE(block_split); -static struct kmem_cache *bio_slab __read_mostly; - static mempool_t *bio_split_pool __read_mostly; /* @@ -40,9 +38,8 @@ static mempool_t *bio_split_pool __read_mostly; * break badly! cannot be bigger than what you can fit into an * unsigned short */ - #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } -static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { +struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), }; #undef BV @@ -53,11 +50,119 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { */ struct bio_set *fs_bio_set; +/* + * Our slab pool management + */ +struct bio_slab { + struct kmem_cache *slab; + unsigned int slab_ref; + unsigned int slab_size; + char name[8]; +}; +static DEFINE_MUTEX(bio_slab_lock); +static struct bio_slab *bio_slabs; +static unsigned int bio_slab_nr, bio_slab_max; + +static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) +{ + unsigned int sz = sizeof(struct bio) + extra_size; + struct kmem_cache *slab = NULL; + struct bio_slab *bslab; + unsigned int i, entry = -1; + + mutex_lock(&bio_slab_lock); + + i = 0; + while (i < bio_slab_nr) { + struct bio_slab *bslab = &bio_slabs[i]; + + if (!bslab->slab && entry == -1) + entry = i; + else if (bslab->slab_size == sz) { + slab = bslab->slab; + bslab->slab_ref++; + break; + } + i++; + } + + if (slab) + goto out_unlock; + + if (bio_slab_nr == bio_slab_max && entry == -1) { + bio_slab_max <<= 1; + bio_slabs = krealloc(bio_slabs, + bio_slab_max * sizeof(struct bio_slab), + GFP_KERNEL); + if (!bio_slabs) + goto out_unlock; + } + if (entry == -1) + entry = bio_slab_nr++; + + bslab = &bio_slabs[entry]; + + snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); + slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL); + if (!slab) + goto out_unlock; + + printk("bio: create slab <%s> at %d\n", bslab->name, entry); + bslab->slab = slab; + bslab->slab_ref = 1; + bslab->slab_size = sz; +out_unlock: + mutex_unlock(&bio_slab_lock); + return slab; +} + +static void bio_put_slab(struct bio_set *bs) +{ + struct bio_slab *bslab = NULL; + unsigned int i; + + mutex_lock(&bio_slab_lock); + + for (i = 0; i < bio_slab_nr; i++) { + if (bs->bio_slab == bio_slabs[i].slab) { + bslab = &bio_slabs[i]; + break; + } + } + + if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n")) + goto out; + + WARN_ON(!bslab->slab_ref); + + if (--bslab->slab_ref) + goto out; + + kmem_cache_destroy(bslab->slab); + bslab->slab = NULL; + +out: + mutex_unlock(&bio_slab_lock); +} + unsigned int bvec_nr_vecs(unsigned short idx) { return bvec_slabs[idx].nr_vecs; } +void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx) +{ + BIO_BUG_ON(idx >= BIOVEC_NR_POOLS); + + if (idx == BIOVEC_MAX_IDX) + mempool_free(bv, bs->bvec_pool); + else { + struct biovec_slab *bvs = bvec_slabs + idx; + + kmem_cache_free(bvs->slab, bv); + } +} + struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) { @@ -134,24 +239,22 @@ fallback: void bio_free(struct bio *bio, struct bio_set *bs) { - if (bio->bi_io_vec) { - const int pool_idx = BIO_POOL_IDX(bio); + void *p; - BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); - - if (pool_idx == BIOVEC_MAX_IDX) - mempool_free(bio->bi_io_vec, bs->bvec_pool); - else { - struct biovec_slab *bvs = bvec_slabs + pool_idx; - - kmem_cache_free(bvs->slab, bio->bi_io_vec); - } - } + if (bio->bi_io_vec) + bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); if (bio_integrity(bio)) bio_integrity_free(bio, bs); - mempool_free(bio, bs->bio_pool); + /* + * If we have front padding, adjust the bio pointer before freeing + */ + p = bio; + if (bs->front_pad) + p -= bs->front_pad; + + mempool_free(p, bs->bio_pool); } /* @@ -188,16 +291,20 @@ void bio_init(struct bio *bio) * for a &struct bio to become free. If a %NULL @bs is passed in, we will * fall back to just using @kmalloc to allocate the required memory. * - * allocate bio and iovecs from the memory pools specified by the - * bio_set structure, or @kmalloc if none given. + * Note that the caller must set ->bi_destructor on succesful return + * of a bio, to do the appropriate freeing of the bio once the reference + * count drops to zero. **/ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { - struct bio *bio; + struct bio *bio = NULL; + + if (bs) { + void *p = mempool_alloc(bs->bio_pool, gfp_mask); - if (bs) - bio = mempool_alloc(bs->bio_pool, gfp_mask); - else + if (p) + bio = p + bs->front_pad; + } else bio = kmalloc(sizeof(*bio), gfp_mask); if (likely(bio)) { @@ -1398,11 +1505,25 @@ void bioset_free(struct bio_set *bs) bioset_integrity_free(bs); biovec_free_pools(bs); + bio_put_slab(bs); kfree(bs); } -struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) +/** + * bioset_create - Create a bio_set + * @pool_size: Number of bio and bio_vecs to cache in the mempool + * @front_pad: Number of bytes to allocate in front of the returned bio + * + * Description: + * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller + * to ask for a number of bytes to be allocated in front of the bio. + * Front pad allocation is useful for embedding the bio inside + * another structure, to avoid allocating extra data to go with the bio. + * Note that the bio must be embedded at the END of that structure always, + * or things will break badly. + */ +struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) { struct bio_set *bs; @@ -1410,16 +1531,22 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) if (!bs) return NULL; - bs->bio_slab = bio_slab; + bs->front_pad = front_pad; - bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bs->bio_slab); + bs->bio_slab = bio_find_or_create_slab(front_pad); + if (!bs->bio_slab) { + kfree(bs); + return NULL; + } + + bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab); if (!bs->bio_pool) goto bad; - if (bioset_integrity_create(bs, bio_pool_size)) + if (bioset_integrity_create(bs, pool_size)) goto bad; - if (!biovec_create_pools(bs, bvec_pool_size)) + if (!biovec_create_pools(bs, pool_size)) return bs; bad: @@ -1443,12 +1570,16 @@ static void __init biovec_init_slabs(void) static int __init init_bio(void) { - bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); + bio_slab_max = 2; + bio_slab_nr = 0; + bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL); + if (!bio_slabs) + panic("bio: can't allocate bios\n"); bio_integrity_init_slab(); biovec_init_slabs(); - fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); + fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); if (!fs_bio_set) panic("bio: can't allocate bios\n"); -- cgit v1.2.3 From 392ddc32982a5c661dd90dd49a3cb37f1c68b782 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Dec 2008 12:42:54 +0100 Subject: bio: add support for inlining a number of bio_vecs inside the bio When we go and allocate a bio for IO, we actually do two allocations. One for the bio itself, and one for the bi_io_vec that holds the actual pages we are interested in. This feature inlines a definable amount of io vecs inside the bio itself, so we eliminate the bio_vec array allocation for IO's up to a certain size. It defaults to 4 vecs, which is typically 16k of IO. Signed-off-by: Jens Axboe --- fs/bio.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 0146f80789e..75e6be18ecd 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -31,6 +31,12 @@ DEFINE_TRACE(block_split); +/* + * Test patch to inline a certain number of bi_io_vec's inside the bio + * itself, to shrink a bio data allocation from two mempool calls to one + */ +#define BIO_INLINE_VECS 4 + static mempool_t *bio_split_pool __read_mostly; /* @@ -241,7 +247,7 @@ void bio_free(struct bio *bio, struct bio_set *bs) { void *p; - if (bio->bi_io_vec) + if (bio_has_allocated_vec(bio)) bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); if (bio_integrity(bio)) @@ -267,7 +273,8 @@ static void bio_fs_destructor(struct bio *bio) static void bio_kmalloc_destructor(struct bio *bio) { - kfree(bio->bi_io_vec); + if (bio_has_allocated_vec(bio)) + kfree(bio->bi_io_vec); kfree(bio); } @@ -314,7 +321,16 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (likely(nr_iovecs)) { unsigned long uninitialized_var(idx); - bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); + if (nr_iovecs <= BIO_INLINE_VECS) { + idx = 0; + bvl = bio->bi_inline_vecs; + nr_iovecs = BIO_INLINE_VECS; + memset(bvl, 0, BIO_INLINE_VECS * sizeof(*bvl)); + } else { + bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, + bs); + nr_iovecs = bvec_nr_vecs(idx); + } if (unlikely(!bvl)) { if (bs) mempool_free(bio, bs->bio_pool); @@ -324,7 +340,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) goto out; } bio->bi_flags |= idx << BIO_POOL_OFFSET; - bio->bi_max_vecs = bvec_nr_vecs(idx); + bio->bi_max_vecs = nr_iovecs; } bio->bi_io_vec = bvl; } @@ -1525,6 +1541,7 @@ void bioset_free(struct bio_set *bs) */ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) { + unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); struct bio_set *bs; bs = kzalloc(sizeof(*bs), GFP_KERNEL); @@ -1533,7 +1550,7 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) bs->front_pad = front_pad; - bs->bio_slab = bio_find_or_create_slab(front_pad); + bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); if (!bs->bio_slab) { kfree(bs); return NULL; -- cgit v1.2.3 From abf137dd7712132ee56d5b3143c2ff61a72a5faa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Dec 2008 08:11:22 +0100 Subject: aio: make the lookup_ioctx() lockless The mm->ioctx_list is currently protected by a reader-writer lock, so we always grab that lock on the read side for doing ioctx lookups. As the workload is extremely reader biased, turn this into an rcu hlist so we can make lookup_ioctx() lockless. Get rid of the rwlock and use a spinlock for providing update side exclusion. There's usually only 1 entry on this list, so it doesn't make sense to look into fancier data structures. Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- fs/aio.c | 100 +++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 56 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index f658441d566..d6f89d3c15e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -191,6 +191,20 @@ static int aio_setup_ring(struct kioctx *ctx) kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ } while(0) +static void ctx_rcu_free(struct rcu_head *head) +{ + struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); + unsigned nr_events = ctx->max_reqs; + + kmem_cache_free(kioctx_cachep, ctx); + + if (nr_events) { + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - nr_events > aio_nr); + aio_nr -= nr_events; + spin_unlock(&aio_nr_lock); + } +} /* __put_ioctx * Called when the last user of an aio context has gone away, @@ -198,8 +212,6 @@ static int aio_setup_ring(struct kioctx *ctx) */ static void __put_ioctx(struct kioctx *ctx) { - unsigned nr_events = ctx->max_reqs; - BUG_ON(ctx->reqs_active); cancel_delayed_work(&ctx->wq); @@ -208,14 +220,7 @@ static void __put_ioctx(struct kioctx *ctx) mmdrop(ctx->mm); ctx->mm = NULL; pr_debug("__put_ioctx: freeing %p\n", ctx); - kmem_cache_free(kioctx_cachep, ctx); - - if (nr_events) { - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - nr_events > aio_nr); - aio_nr -= nr_events; - spin_unlock(&aio_nr_lock); - } + call_rcu(&ctx->rcu_head, ctx_rcu_free); } #define get_ioctx(kioctx) do { \ @@ -235,6 +240,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) { struct mm_struct *mm; struct kioctx *ctx; + int did_sync = 0; /* Prevent overflows */ if ((nr_events > (0x10000000U / sizeof(struct io_event))) || @@ -267,21 +273,30 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) goto out_freectx; /* limit the number of system wide aios */ - spin_lock(&aio_nr_lock); - if (aio_nr + ctx->max_reqs > aio_max_nr || - aio_nr + ctx->max_reqs < aio_nr) - ctx->max_reqs = 0; - else - aio_nr += ctx->max_reqs; - spin_unlock(&aio_nr_lock); + do { + spin_lock_bh(&aio_nr_lock); + if (aio_nr + nr_events > aio_max_nr || + aio_nr + nr_events < aio_nr) + ctx->max_reqs = 0; + else + aio_nr += ctx->max_reqs; + spin_unlock_bh(&aio_nr_lock); + if (ctx->max_reqs || did_sync) + break; + + /* wait for rcu callbacks to have completed before giving up */ + synchronize_rcu(); + did_sync = 1; + ctx->max_reqs = nr_events; + } while (1); + if (ctx->max_reqs == 0) goto out_cleanup; /* now link into global list. */ - write_lock(&mm->ioctx_list_lock); - ctx->next = mm->ioctx_list; - mm->ioctx_list = ctx; - write_unlock(&mm->ioctx_list_lock); + spin_lock(&mm->ioctx_lock); + hlist_add_head_rcu(&ctx->list, &mm->ioctx_list); + spin_unlock(&mm->ioctx_lock); dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, current->mm, ctx->ring_info.nr); @@ -375,11 +390,12 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb) */ void exit_aio(struct mm_struct *mm) { - struct kioctx *ctx = mm->ioctx_list; - mm->ioctx_list = NULL; - while (ctx) { - struct kioctx *next = ctx->next; - ctx->next = NULL; + struct kioctx *ctx; + + while (!hlist_empty(&mm->ioctx_list)) { + ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list); + hlist_del_rcu(&ctx->list); + aio_cancel_all(ctx); wait_for_all_aios(ctx); @@ -394,7 +410,6 @@ void exit_aio(struct mm_struct *mm) atomic_read(&ctx->users), ctx->dead, ctx->reqs_active); put_ioctx(ctx); - ctx = next; } } @@ -555,19 +570,21 @@ int aio_put_req(struct kiocb *req) static struct kioctx *lookup_ioctx(unsigned long ctx_id) { - struct kioctx *ioctx; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; + struct kioctx *ctx = NULL; + struct hlist_node *n; - mm = current->mm; - read_lock(&mm->ioctx_list_lock); - for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next) - if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) { - get_ioctx(ioctx); + rcu_read_lock(); + + hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { + if (ctx->user_id == ctx_id && !ctx->dead) { + get_ioctx(ctx); break; } - read_unlock(&mm->ioctx_list_lock); + } - return ioctx; + rcu_read_unlock(); + return ctx; } /* @@ -1215,19 +1232,14 @@ out: static void io_destroy(struct kioctx *ioctx) { struct mm_struct *mm = current->mm; - struct kioctx **tmp; int was_dead; /* delete the entry from the list is someone else hasn't already */ - write_lock(&mm->ioctx_list_lock); + spin_lock(&mm->ioctx_lock); was_dead = ioctx->dead; ioctx->dead = 1; - for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx; - tmp = &(*tmp)->next) - ; - if (*tmp) - *tmp = ioctx->next; - write_unlock(&mm->ioctx_list_lock); + hlist_del_rcu(&ioctx->list); + spin_unlock(&mm->ioctx_lock); dprintk("aio_release(%p)\n", ioctx); if (likely(!was_dead)) -- cgit v1.2.3 From b3a6ffe16b5cc48abe7db8d04882dc45280eb693 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 12 Dec 2008 09:51:16 +0100 Subject: Get rid of CONFIG_LSF We have two seperate config entries for large devices/files. One is CONFIG_LBD that guards just the devices, the other is CONFIG_LSF that handles large files. This doesn't make a lot of sense, you typically want both or none. So get rid of CONFIG_LSF and change CONFIG_LBD wording to indicate that it covers both. Acked-by: Jean Delvare Signed-off-by: Jens Axboe --- fs/ext4/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e4a241c65db..04158ad74db 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1721,7 +1721,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) /* small i_blocks in vfs inode? */ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * CONFIG_LSF is not enabled implies the inode + * CONFIG_LBD is not enabled implies the inode * i_block represent total blocks in 512 bytes * 32 == size of vfs inode i_blocks * 8 */ @@ -1764,7 +1764,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * !has_huge_files or CONFIG_LSF is not enabled + * !has_huge_files or CONFIG_LBD is not enabled * implies the inode i_block represent total blocks in * 512 bytes 32 == size of vfs inode i_blocks * 8 */ @@ -2021,13 +2021,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (has_huge_files) { /* * Large file size enabled file system can only be - * mount if kernel is build with CONFIG_LSF + * mount if kernel is build with CONFIG_LBD */ if (sizeof(root->i_blocks) < sizeof(u64) && !(sb->s_flags & MS_RDONLY)) { printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " "files cannot be mounted read-write " - "without CONFIG_LSF.\n", sb->s_id); + "without CONFIG_LBD.\n", sb->s_id); goto failed_mount; } } -- cgit v1.2.3 From d3f761104b097738932afcc310fbbbbfb007ef92 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 23 Dec 2008 12:46:21 +0100 Subject: bio: get rid of bio_vec clearing We don't need to clear the memory used for adding bio_vec entries, since nobody should be looking at members unitialized. Any valid use should be below bio->bi_vcnt, and that members up until that count must be valid since they were added through bio_add_page(). Signed-off-by: Jens Axboe --- fs/bio.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 75e6be18ecd..711cee10360 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -180,7 +180,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, * kzalloc() for the exact number of vecs right away. */ if (!bs) - bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); + bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask); /* * see comment near bvec_array define! @@ -237,9 +237,6 @@ fallback: } } - if (bvl) - memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); - return bvl; } @@ -325,7 +322,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) idx = 0; bvl = bio->bi_inline_vecs; nr_iovecs = BIO_INLINE_VECS; - memset(bvl, 0, BIO_INLINE_VECS * sizeof(*bvl)); } else { bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); -- cgit v1.2.3 From 46f72f57d279688c4524df78edb5738db730eeef Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 30 Dec 2008 16:35:55 -0500 Subject: fs/nfs/nfs4proc.c: make nfs4_map_errors() static nfs4_map_errors() can become static. Signed-off-by: WANG Cong Cc: J. Bruce Fields Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 - fs/nfs/nfs4proc.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d5f5efaf2b2..4e4d3320437 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -189,7 +189,6 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); /* nfs4proc.c */ -extern int nfs4_map_errors(int err); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 49eebe25bba..8dde84b988d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -67,7 +67,7 @@ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); /* Prevent leaks of NFSv4 errors into userland */ -int nfs4_map_errors(int err) +static int nfs4_map_errors(int err) { if (err < -1000) { dprintk("%s could not handle NFSv4 error %d\n", -- cgit v1.2.3