From c56c9631cbe88f08854a56ff9776c1f310916830 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Oct 2009 04:03:46 +0000 Subject: xfs: fix mmap_sem/iolock inversion in xfs_free_eofblocks When xfs_free_eofblocks is called from ->release the VM might already hold the mmap_sem, but in the write path we take the iolock before taking the mmap_sem in the generic write code. Switch xfs_free_eofblocks to only trylock the iolock if called from ->release and skip trimming the prellocated blocks in that case. We'll still free them later on the final iput. Signed-off-by: Christoph Hellwig Reviewed-by: Alex Elder Signed-off-by: Alex Elder --- fs/xfs/xfs_rw.h | 7 ------- fs/xfs/xfs_vnodeops.c | 34 ++++++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index f5e4874c37d..726014d1c92 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -36,13 +36,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); } -/* - * Flags for xfs_free_eofblocks - */ -#define XFS_FREE_EOF_LOCK (1<<0) -#define XFS_FREE_EOF_NOLOCK (1<<1) - - /* * helper function to extract extent size hint from inode */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 3fac146b3b7..d98401470cf 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -708,6 +708,11 @@ xfs_fsync( return error; } +/* + * Flags for xfs_free_eofblocks + */ +#define XFS_FREE_EOF_TRYLOCK (1<<0) + /* * This is called by xfs_inactive to free any blocks beyond eof * when the link count isn't zero and by xfs_dm_punch_hole() when @@ -726,7 +731,6 @@ xfs_free_eofblocks( xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; - int use_iolock = (flags & XFS_FREE_EOF_LOCK); /* * Figure out if there are any blocks beyond the end @@ -768,14 +772,19 @@ xfs_free_eofblocks( * cache and we can't * do that within a transaction. */ - if (use_iolock) + if (flags & XFS_FREE_EOF_TRYLOCK) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + xfs_trans_cancel(tp, 0); + return 0; + } + } else { xfs_ilock(ip, XFS_IOLOCK_EXCL); + } error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, ip->i_size); if (error) { xfs_trans_cancel(tp, 0); - if (use_iolock) - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } @@ -812,8 +821,7 @@ xfs_free_eofblocks( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); } - xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) - : XFS_ILOCK_EXCL)); + xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL); } return error; } @@ -1113,7 +1121,17 @@ xfs_release( (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + + /* + * If we can't get the iolock just skip truncating + * the blocks past EOF because we could deadlock + * with the mmap_sem otherwise. We'll get another + * chance to drop them once the last reference to + * the inode is dropped, so we'll never leak blocks + * permanently. + */ + error = xfs_free_eofblocks(mp, ip, + XFS_FREE_EOF_TRYLOCK); if (error) return error; } @@ -1184,7 +1202,7 @@ xfs_inactive( (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || (ip->i_delayed_blks != 0)))) { - error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); + error = xfs_free_eofblocks(mp, ip, 0); if (error) return VN_INACTIVE_CACHE; } -- cgit v1.2.3