From 9ccbece546cf836f67f6d9bb4bf2f70f7476cb2c Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:53:25 +1100 Subject: [XFS] Fix use-after-free with log and quotas Destroying the quota stuff on unmount can access the log - ie XFS_QM_DONE() ends up in xfs_dqunlock() which calls xfs_trans_unlocked_item() and then xfs_log_move_tail(). By this time the log has already been destroyed. Just move the cleanup of the quota code earlier in xfs_unmountfs() before the call to xfs_log_unmount(). Moving XFS_QM_DONE() up near XFS_QM_DQPURGEALL() seems like a good spot. SGI-PV: 987086 SGI-Modid: xfs-linux-melb:xfs-kern:32148a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig Signed-off-by: Peter Leckie --- fs/xfs/xfs_mount.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a4503f5e949..15f5dd22fbb 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1245,6 +1245,9 @@ xfs_unmountfs( XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); + if (mp->m_quotainfo) + XFS_QM_DONE(mp); + /* * Flush out the log synchronously so that we know for sure * that nothing is pinned. This is important because bflush() @@ -1297,8 +1300,6 @@ xfs_unmountfs( xfs_errortag_clearall(mp, 0); #endif xfs_free_perag(mp); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); } STATIC void -- cgit v1.2.3 From 2cf7f0da3ae225848a2ee10d4e216448a770fd00 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:59:06 +1100 Subject: [XFS] Wait for all I/O on truncate to zero file size It's possible to have outstanding xfs_ioend_t's queued when the file size is zero. This can happen in the direct I/O path when a direct I/O write fails due to ENOSPC. In this case the xfs_ioend_t will still be queued (ie xfs_end_io_direct() does not know that the I/O failed so can't force the xfs_ioend_t to be flushed synchronously). When we truncate a file on unlink we don't know to wait for these xfs_ioend_ts and we can have a use-after-free situation if the inode is reclaimed before the xfs_ioend_t is finally processed. As was suggested by Dave Chinner lets wait for all I/Os to complete when truncating the file size to zero. SGI-PV: 981668 SGI-Modid: xfs-linux-melb:xfs-kern:32216a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index dbd9cef852e..a391b955df0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1414,7 +1414,7 @@ xfs_itruncate_start( mp = ip->i_mount; /* wait for the completion of any pending DIOs */ - if (new_size < ip->i_size) + if (new_size == 0 || new_size < ip->i_size) vn_iowait(ip); /* -- cgit v1.2.3 From 6f9f51adb6ac0a49fce49e01c47dcfc2810c6e9d Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:12 +1100 Subject: [XFS] Account for allocated blocks when expanding directories When we create a directory, we reserve a number of blocks for the maximum possible expansion of of the directory due to various btree splits, freespace allocation, etc. Unfortunately, each allocation is not reflected in the total number of blocks still available to the transaction, so the maximal reservation is used over and over again. This leads to problems where an allocation group has only enough blocks for *some* of the allocations required for the directory modification. After the first N allocations, the remaining blocks in the allocation group drops below the total reservation, and subsequent allocations fail because the allocator will not allow the allocation to proceed if the AG does not have the enough blocks available for the entire allocation total. This results in an ENOSPC occurring after an allocation has already occurred. This results in aborting the directory operation (leaving the directory in an inconsistent state) and cancelling a dirty transaction, which results in a filesystem shutdown. Avoid the problem by reflecting the number of blocks allocated in any directory expansion in the total number of blocks available to the modification in progress. This prevents a directory modification from being aborted part way through with an ENOSPC. SGI-PV: 988144 SGI-Modid: xfs-linux-melb:xfs-kern:32340a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_da_btree.c | 5 +++++ fs/xfs/xfs_dir2.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 9e561a9cefc..a11a8390bf6 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1566,11 +1566,14 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) int nmap, error, w, count, c, got, i, mapi; xfs_trans_t *tp; xfs_mount_t *mp; + xfs_drfsbno_t nblks; dp = args->dp; mp = dp->i_mount; w = args->whichfork; tp = args->trans; + nblks = dp->i_d.di_nblocks; + /* * For new directories adjust the file offset and block count. */ @@ -1647,6 +1650,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) } if (mapp != &map) kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *new_blkno = (xfs_dablk_t)bno; return 0; } diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 80e0dc51361..1afb12278b8 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -525,11 +525,13 @@ xfs_dir2_grow_inode( xfs_mount_t *mp; int nmap; /* number of bmap entries */ xfs_trans_t *tp; + xfs_drfsbno_t nblks; xfs_dir2_trace_args_s("grow_inode", args, space); dp = args->dp; tp = args->trans; mp = dp->i_mount; + nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ @@ -622,7 +624,11 @@ xfs_dir2_grow_inode( */ if (mapp != &map) kmem_free(mapp); + + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); + /* * Update file's size if this is the data space and it grew. */ -- cgit v1.2.3 From 8f330f5149ef41ff943b04d914406cc417f62784 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 10 Nov 2008 16:50:24 +1100 Subject: [XFS] handle memory allocation failures during log initialisation When there is no memory left in the system, xfs_buf_get_noaddr() can fail. If this happens at mount time during xlog_alloc_log() we fail to catch the error and oops. Catch the error from xfs_buf_get_noaddr(), and allow other memory allocations to fail and catch those errors too. Report the error to the console and fail the mount with ENOMEM. Tested by manually injecting errors into xfs_buf_get_noaddr() and xlog_alloc_log(). Version 2: o remove unnecessary casts of the returned pointer from kmem_zalloc() SGI-PV: 987246 Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_log.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0b02c644355..3608a0f0a5f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -563,6 +563,11 @@ xfs_log_mount( } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); + if (!mp->m_log) { + cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); + error = ENOMEM; + goto out; + } /* * Initialize the AIL now we have a log. @@ -601,6 +606,7 @@ xfs_log_mount( return 0; error: xfs_log_unmount_dealloc(mp); +out: return error; } /* xfs_log_mount */ @@ -1217,7 +1223,9 @@ xlog_alloc_log(xfs_mount_t *mp, int i; int iclogsize; - log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); + log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); + if (!log) + return NULL; log->l_mp = mp; log->l_targ = log_target; @@ -1249,6 +1257,8 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_get_iclog_buffer_size(mp, log); bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_log; XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); @@ -1275,13 +1285,17 @@ xlog_alloc_log(xfs_mount_t *mp, iclogsize = log->l_iclog_size; ASSERT(log->l_iclog_size >= 4096); for (i=0; i < log->l_iclog_bufs; i++) { - *iclogp = (xlog_in_core_t *) - kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); + *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); + if (!*iclogp) + goto out_free_iclog; + iclog = *iclogp; iclog->ic_prev = prev_iclog; prev_iclog = iclog; bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_iclog; if (!XFS_BUF_CPSEMA(bp)) ASSERT(0); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); @@ -1323,6 +1337,25 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ return log; + +out_free_iclog: + for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { + prev_iclog = iclog->ic_next; + if (iclog->ic_bp) { + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); + xfs_buf_free(iclog->ic_bp); + xlog_trace_iclog_dealloc(iclog); + } + kmem_free(iclog); + } + spinlock_destroy(&log->l_icloglock); + spinlock_destroy(&log->l_grant_lock); + xlog_trace_loggrant_dealloc(log); + xfs_buf_free(log->l_xbuf); +out_free_log: + kmem_free(log); + return NULL; } /* xlog_alloc_log */ -- cgit v1.2.3 From 220ca310a53200b4bfbc7c4c6e365eea284ec44f Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:40:09 +1100 Subject: [XFS] XFS: Check for valid transaction headers in recovery When we are about to add a new item to a transaction in recovery, we need to check that it is valid first. Currently we just assert that header magic number matches, but in production systems that is not present and we add a corrupted transaction to the list to be processed. This results in a kernel oops later when processing the corrupted transaction. Instead, if we detect a corrupted transaction, abort recovery and leave the user to clean up the mess that has occurred. SGI-PV: 988145 SGI-Modid: xfs-linux-melb:xfs-kern:32356a Signed-off-by: David Chinner Signed-off-by: Tim Shimmin Signed-off-by: Eric Sandeen Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_log_recover.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 82d46ce69d5..70e3ba32e6b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1419,7 +1419,13 @@ xlog_recover_add_to_trans( return 0; item = trans->r_itemq; if (item == NULL) { - ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); + /* we need to catch log corruptions here */ + if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { + xlog_warn("XFS: xlog_recover_add_to_trans: " + "bad header magic number"); + ASSERT(0); + return XFS_ERROR(EIO); + } if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); memcpy(&trans->r_theader, dp, len); /* d, s, l */ -- cgit v1.2.3